5585d3cc24c6a78319b4c0e0af219cb35eebcd39
[moodle.git] / lib / markdown / Markdown.php
1 <?php
2 #
3 # Markdown  -  A text-to-HTML conversion tool for web writers
4 #
5 # PHP Markdown  
6 # Copyright (c) 2004-2015 Michel Fortin  
7 # <https://michelf.ca/projects/php-markdown/>
8 #
9 # Original Markdown  
10 # Copyright (c) 2004-2006 John Gruber  
11 # <http://daringfireball.net/projects/markdown/>
12 #
13 namespace Michelf;
16 #
17 # Markdown Parser Class
18 #
20 class Markdown implements MarkdownInterface {
22         ### Version ###
24         const  MARKDOWNLIB_VERSION  =  "1.5.0";
26         ### Simple Function Interface ###
28         public static function defaultTransform($text) {
29         #
30         # Initialize the parser and return the result of its transform method.
31         # This will work fine for derived classes too.
32         #
33                 # Take parser class on which this function was called.
34                 $parser_class = \get_called_class();
36                 # try to take parser from the static parser list
37                 static $parser_list;
38                 $parser =& $parser_list[$parser_class];
40                 # create the parser it not already set
41                 if (!$parser)
42                         $parser = new $parser_class;
44                 # Transform text using parser.
45                 return $parser->transform($text);
46         }
48         ### Configuration Variables ###
50         # Change to ">" for HTML output.
51         public $empty_element_suffix = " />";
52         public $tab_width = 4;
53         
54         # Change to `true` to disallow markup or entities.
55         public $no_markup = false;
56         public $no_entities = false;
57         
58         # Predefined urls and titles for reference links and images.
59         public $predef_urls = array();
60         public $predef_titles = array();
62         # Optional filter function for URLs
63         public $url_filter_func = null;
65         # Optional header id="" generation callback function.
66         public $header_id_func = null;
68         # Class attribute to toggle "enhanced ordered list" behaviour
69         # setting this to true will allow ordered lists to start from the index
70         # number that is defined first.  For example:
71         # 2. List item two
72         # 3. List item three
73         # 
74         # becomes
75         # <ol start="2">
76         # <li>List item two</li>
77         # <li>List item three</li>
78         # </ol>
79         public $enhanced_ordered_list = false;
81         ### Parser Implementation ###
83         # Regex to match balanced [brackets].
84         # Needed to insert a maximum bracked depth while converting to PHP.
85         protected $nested_brackets_depth = 6;
86         protected $nested_brackets_re;
87         
88         protected $nested_url_parenthesis_depth = 4;
89         protected $nested_url_parenthesis_re;
91         # Table of hash values for escaped characters:
92         protected $escape_chars = '\`*_{}[]()>#+-.!';
93         protected $escape_chars_re;
96         public function __construct() {
97         #
98         # Constructor function. Initialize appropriate member variables.
99         #
100                 $this->_initDetab();
101                 $this->prepareItalicsAndBold();
102         
103                 $this->nested_brackets_re = 
104                         str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
105                         str_repeat('\])*', $this->nested_brackets_depth);
106         
107                 $this->nested_url_parenthesis_re = 
108                         str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
109                         str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
110                 
111                 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
112                 
113                 # Sort document, block, and span gamut in ascendent priority order.
114                 asort($this->document_gamut);
115                 asort($this->block_gamut);
116                 asort($this->span_gamut);
117         }
120         # Internal hashes used during transformation.
121         protected $urls = array();
122         protected $titles = array();
123         protected $html_hashes = array();
124         
125         # Status flag to avoid invalid nesting.
126         protected $in_anchor = false;
127         
128         
129         protected function setup() {
130         #
131         # Called before the transformation process starts to setup parser 
132         # states.
133         #
134                 # Clear global hashes.
135                 $this->urls = $this->predef_urls;
136                 $this->titles = $this->predef_titles;
137                 $this->html_hashes = array();
138                 
139                 $this->in_anchor = false;
140         }
141         
142         protected function teardown() {
143         #
144         # Called after the transformation process to clear any variable 
145         # which may be taking up memory unnecessarly.
146         #
147                 $this->urls = array();
148                 $this->titles = array();
149                 $this->html_hashes = array();
150         }
153         public function transform($text) {
154         #
155         # Main function. Performs some preprocessing on the input text
156         # and pass it through the document gamut.
157         #
158                 $this->setup();
159         
160                 # Remove UTF-8 BOM and marker character in input, if present.
161                 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
163                 # Standardize line endings:
164                 #   DOS to Unix and Mac to Unix
165                 $text = preg_replace('{\r\n?}', "\n", $text);
167                 # Make sure $text ends with a couple of newlines:
168                 $text .= "\n\n";
170                 # Convert all tabs to spaces.
171                 $text = $this->detab($text);
173                 # Turn block-level HTML blocks into hash entries
174                 $text = $this->hashHTMLBlocks($text);
176                 # Strip any lines consisting only of spaces and tabs.
177                 # This makes subsequent regexen easier to write, because we can
178                 # match consecutive blank lines with /\n+/ instead of something
179                 # contorted like /[ ]*\n+/ .
180                 $text = preg_replace('/^[ ]+$/m', '', $text);
182                 # Run document gamut methods.
183                 foreach ($this->document_gamut as $method => $priority) {
184                         $text = $this->$method($text);
185                 }
186                 
187                 $this->teardown();
189                 return $text . "\n";
190         }
191         
192         protected $document_gamut = array(
193                 # Strip link definitions, store in hashes.
194                 "stripLinkDefinitions" => 20,
195                 
196                 "runBasicBlockGamut"   => 30,
197                 );
200         protected function stripLinkDefinitions($text) {
201         #
202         # Strips link definitions from text, stores the URLs and titles in
203         # hash references.
204         #
205                 $less_than_tab = $this->tab_width - 1;
207                 # Link defs are in the form: ^[id]: url "optional title"
208                 $text = preg_replace_callback('{
209                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
210                                                           [ ]*
211                                                           \n?                           # maybe *one* newline
212                                                           [ ]*
213                                                         (?:
214                                                           <(.+?)>                       # url = $2
215                                                         |
216                                                           (\S+?)                        # url = $3
217                                                         )
218                                                           [ ]*
219                                                           \n?                           # maybe one newline
220                                                           [ ]*
221                                                         (?:
222                                                                 (?<=\s)                 # lookbehind for whitespace
223                                                                 ["(]
224                                                                 (.*?)                   # title = $4
225                                                                 [")]
226                                                                 [ ]*
227                                                         )?      # title is optional
228                                                         (?:\n+|\Z)
229                         }xm',
230                         array($this, '_stripLinkDefinitions_callback'),
231                         $text);
232                 return $text;
233         }
234         protected function _stripLinkDefinitions_callback($matches) {
235                 $link_id = strtolower($matches[1]);
236                 $url = $matches[2] == '' ? $matches[3] : $matches[2];
237                 $this->urls[$link_id] = $url;
238                 $this->titles[$link_id] =& $matches[4];
239                 return ''; # String that will replace the block
240         }
243         protected function hashHTMLBlocks($text) {
244                 if ($this->no_markup)  return $text;
246                 $less_than_tab = $this->tab_width - 1;
248                 # Hashify HTML blocks:
249                 # We only want to do this for block-level HTML tags, such as headers,
250                 # lists, and tables. That's because we still want to wrap <p>s around
251                 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
252                 # phrase emphasis, and spans. The list of tags we're looking for is
253                 # hard-coded:
254                 #
255                 # *  List "a" is made of tags which can be both inline or block-level.
256                 #    These will be treated block-level when the start tag is alone on 
257                 #    its line, otherwise they're not matched here and will be taken as 
258                 #    inline later.
259                 # *  List "b" is made of tags which are always block-level;
260                 #
261                 $block_tags_a_re = 'ins|del';
262                 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
263                                                    'script|noscript|style|form|fieldset|iframe|math|svg|'.
264                                                    'article|section|nav|aside|hgroup|header|footer|'.
265                                                    'figure';
267                 # Regular expression for the content of a block tag.
268                 $nested_tags_level = 4;
269                 $attr = '
270                         (?>                             # optional tag attributes
271                           \s                    # starts with whitespace
272                           (?>
273                                 [^>"/]+         # text outside quotes
274                           |
275                                 /+(?!>)         # slash not followed by ">"
276                           |
277                                 "[^"]*"         # text inside double quotes (tolerate ">")
278                           |
279                                 \'[^\']*\'      # text inside single quotes (tolerate ">")
280                           )*
281                         )?      
282                         ';
283                 $content =
284                         str_repeat('
285                                 (?>
286                                   [^<]+                 # content without tag
287                                 |
288                                   <\2                   # nested opening tag
289                                         '.$attr.'       # attributes
290                                         (?>
291                                           />
292                                         |
293                                           >', $nested_tags_level).      # end of opening tag
294                                           '.*?'.                                        # last level nested tag content
295                         str_repeat('
296                                           </\2\s*>      # closing nested tag
297                                         )
298                                   |                             
299                                         <(?!/\2\s*>     # other tags with a different name
300                                   )
301                                 )*',
302                                 $nested_tags_level);
303                 $content2 = str_replace('\2', '\3', $content);
305                 # First, look for nested blocks, e.g.:
306                 #       <div>
307                 #               <div>
308                 #               tags for inner block must be indented.
309                 #               </div>
310                 #       </div>
311                 #
312                 # The outermost tags must start at the left margin for this to match, and
313                 # the inner nested divs must be indented.
314                 # We need to do this before the next, more liberal match, because the next
315                 # match will start at the first `<div>` and stop at the first `</div>`.
316                 $text = preg_replace_callback('{(?>
317                         (?>
318                                 (?<=\n)                 # Starting on its own line
319                                 |                               # or
320                                 \A\n?                   # the at beginning of the doc
321                         )
322                         (                                               # save in $1
324                           # Match from `\n<tag>` to `</tag>\n`, handling nested tags 
325                           # in between.
326                                         
327                                                 [ ]{0,'.$less_than_tab.'}
328                                                 <('.$block_tags_b_re.')# start tag = $2
329                                                 '.$attr.'>                      # attributes followed by > and \n
330                                                 '.$content.'            # content, support nesting
331                                                 </\2>                           # the matching end tag
332                                                 [ ]*                            # trailing spaces/tabs
333                                                 (?=\n+|\Z)      # followed by a newline or end of document
335                         | # Special version for tags of group a.
337                                                 [ ]{0,'.$less_than_tab.'}
338                                                 <('.$block_tags_a_re.')# start tag = $3
339                                                 '.$attr.'>[ ]*\n        # attributes followed by >
340                                                 '.$content2.'           # content, support nesting
341                                                 </\3>                           # the matching end tag
342                                                 [ ]*                            # trailing spaces/tabs
343                                                 (?=\n+|\Z)      # followed by a newline or end of document
344                                         
345                         | # Special case just for <hr />. It was easier to make a special 
346                           # case than to make the other regex more complicated.
347                         
348                                                 [ ]{0,'.$less_than_tab.'}
349                                                 <(hr)                           # start tag = $2
350                                                 '.$attr.'                       # attributes
351                                                 /?>                                     # the matching end tag
352                                                 [ ]*
353                                                 (?=\n{2,}|\Z)           # followed by a blank line or end of document
354                         
355                         | # Special case for standalone HTML comments:
356                         
357                                         [ ]{0,'.$less_than_tab.'}
358                                         (?s:
359                                                 <!-- .*? -->
360                                         )
361                                         [ ]*
362                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
363                         
364                         | # PHP and ASP-style processor instructions (<? and <%)
365                         
366                                         [ ]{0,'.$less_than_tab.'}
367                                         (?s:
368                                                 <([?%])                 # $2
369                                                 .*?
370                                                 \2>
371                                         )
372                                         [ ]*
373                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
374                                         
375                         )
376                         )}Sxmi',
377                         array($this, '_hashHTMLBlocks_callback'),
378                         $text);
380                 return $text;
381         }
382         protected function _hashHTMLBlocks_callback($matches) {
383                 $text = $matches[1];
384                 $key  = $this->hashBlock($text);
385                 return "\n\n$key\n\n";
386         }
387         
388         
389         protected function hashPart($text, $boundary = 'X') {
390         #
391         # Called whenever a tag must be hashed when a function insert an atomic 
392         # element in the text stream. Passing $text to through this function gives
393         # a unique text-token which will be reverted back when calling unhash.
394         #
395         # The $boundary argument specify what character should be used to surround
396         # the token. By convension, "B" is used for block elements that needs not
397         # to be wrapped into paragraph tags at the end, ":" is used for elements
398         # that are word separators and "X" is used in the general case.
399         #
400                 # Swap back any tag hash found in $text so we do not have to `unhash`
401                 # multiple times at the end.
402                 $text = $this->unhash($text);
403                 
404                 # Then hash the block.
405                 static $i = 0;
406                 $key = "$boundary\x1A" . ++$i . $boundary;
407                 $this->html_hashes[$key] = $text;
408                 return $key; # String that will replace the tag.
409         }
412         protected function hashBlock($text) {
413         #
414         # Shortcut function for hashPart with block-level boundaries.
415         #
416                 return $this->hashPart($text, 'B');
417         }
420         protected $block_gamut = array(
421         #
422         # These are all the transformations that form block-level
423         # tags like paragraphs, headers, and list items.
424         #
425                 "doHeaders"         => 10,
426                 "doHorizontalRules" => 20,
427                 
428                 "doLists"           => 40,
429                 "doCodeBlocks"      => 50,
430                 "doBlockQuotes"     => 60,
431                 );
433         protected function runBlockGamut($text) {
434         #
435         # Run block gamut tranformations.
436         #
437                 # We need to escape raw HTML in Markdown source before doing anything 
438                 # else. This need to be done for each block, and not only at the 
439                 # begining in the Markdown function since hashed blocks can be part of
440                 # list items and could have been indented. Indented blocks would have 
441                 # been seen as a code block in a previous pass of hashHTMLBlocks.
442                 $text = $this->hashHTMLBlocks($text);
443                 
444                 return $this->runBasicBlockGamut($text);
445         }
446         
447         protected function runBasicBlockGamut($text) {
448         #
449         # Run block gamut tranformations, without hashing HTML blocks. This is 
450         # useful when HTML blocks are known to be already hashed, like in the first
451         # whole-document pass.
452         #
453                 foreach ($this->block_gamut as $method => $priority) {
454                         $text = $this->$method($text);
455                 }
456                 
457                 # Finally form paragraph and restore hashed blocks.
458                 $text = $this->formParagraphs($text);
460                 return $text;
461         }
462         
463         
464         protected function doHorizontalRules($text) {
465                 # Do Horizontal Rules:
466                 return preg_replace(
467                         '{
468                                 ^[ ]{0,3}       # Leading space
469                                 ([-*_])         # $1: First marker
470                                 (?>                     # Repeated marker group
471                                         [ ]{0,2}        # Zero, one, or two spaces.
472                                         \1                      # Marker character
473                                 ){2,}           # Group repeated at least twice
474                                 [ ]*            # Tailing spaces
475                                 $                       # End of line.
476                         }mx',
477                         "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 
478                         $text);
479         }
482         protected $span_gamut = array(
483         #
484         # These are all the transformations that occur *within* block-level
485         # tags like paragraphs, headers, and list items.
486         #
487                 # Process character escapes, code spans, and inline HTML
488                 # in one shot.
489                 "parseSpan"           => -30,
491                 # Process anchor and image tags. Images must come first,
492                 # because ![foo][f] looks like an anchor.
493                 "doImages"            =>  10,
494                 "doAnchors"           =>  20,
495                 
496                 # Make links out of things like `<http://example.com/>`
497                 # Must come after doAnchors, because you can use < and >
498                 # delimiters in inline links like [this](<url>).
499                 "doAutoLinks"         =>  30,
500                 "encodeAmpsAndAngles" =>  40,
502                 "doItalicsAndBold"    =>  50,
503                 "doHardBreaks"        =>  60,
504                 );
506         protected function runSpanGamut($text) {
507         #
508         # Run span gamut tranformations.
509         #
510                 foreach ($this->span_gamut as $method => $priority) {
511                         $text = $this->$method($text);
512                 }
514                 return $text;
515         }
516         
517         
518         protected function doHardBreaks($text) {
519                 # Do hard breaks:
520                 return preg_replace_callback('/ {2,}\n/', 
521                         array($this, '_doHardBreaks_callback'), $text);
522         }
523         protected function _doHardBreaks_callback($matches) {
524                 return $this->hashPart("<br$this->empty_element_suffix\n");
525         }
528         protected function doAnchors($text) {
529         #
530         # Turn Markdown link shortcuts into XHTML <a> tags.
531         #
532                 if ($this->in_anchor) return $text;
533                 $this->in_anchor = true;
534                 
535                 #
536                 # First, handle reference-style links: [link text] [id]
537                 #
538                 $text = preg_replace_callback('{
539                         (                                       # wrap whole match in $1
540                           \[
541                                 ('.$this->nested_brackets_re.') # link text = $2
542                           \]
544                           [ ]?                          # one optional space
545                           (?:\n[ ]*)?           # one optional newline followed by spaces
547                           \[
548                                 (.*?)           # id = $3
549                           \]
550                         )
551                         }xs',
552                         array($this, '_doAnchors_reference_callback'), $text);
554                 #
555                 # Next, inline-style links: [link text](url "optional title")
556                 #
557                 $text = preg_replace_callback('{
558                         (                               # wrap whole match in $1
559                           \[
560                                 ('.$this->nested_brackets_re.') # link text = $2
561                           \]
562                           \(                    # literal paren
563                                 [ \n]*
564                                 (?:
565                                         <(.+?)> # href = $3
566                                 |
567                                         ('.$this->nested_url_parenthesis_re.')  # href = $4
568                                 )
569                                 [ \n]*
570                                 (                       # $5
571                                   ([\'"])       # quote char = $6
572                                   (.*?)         # Title = $7
573                                   \6            # matching quote
574                                   [ \n]*        # ignore any spaces/tabs between closing quote and )
575                                 )?                      # title is optional
576                           \)
577                         )
578                         }xs',
579                         array($this, '_doAnchors_inline_callback'), $text);
581                 #
582                 # Last, handle reference-style shortcuts: [link text]
583                 # These must come last in case you've also got [link text][1]
584                 # or [link text](/foo)
585                 #
586                 $text = preg_replace_callback('{
587                         (                                       # wrap whole match in $1
588                           \[
589                                 ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
590                           \]
591                         )
592                         }xs',
593                         array($this, '_doAnchors_reference_callback'), $text);
595                 $this->in_anchor = false;
596                 return $text;
597         }
598         protected function _doAnchors_reference_callback($matches) {
599                 $whole_match =  $matches[1];
600                 $link_text   =  $matches[2];
601                 $link_id     =& $matches[3];
603                 if ($link_id == "") {
604                         # for shortcut links like [this][] or [this].
605                         $link_id = $link_text;
606                 }
607                 
608                 # lower-case and turn embedded newlines into spaces
609                 $link_id = strtolower($link_id);
610                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
612                 if (isset($this->urls[$link_id])) {
613                         $url = $this->urls[$link_id];
614                         $url = $this->encodeURLAttribute($url);
615                         
616                         $result = "<a href=\"$url\"";
617                         if ( isset( $this->titles[$link_id] ) ) {
618                                 $title = $this->titles[$link_id];
619                                 $title = $this->encodeAttribute($title);
620                                 $result .=  " title=\"$title\"";
621                         }
622                 
623                         $link_text = $this->runSpanGamut($link_text);
624                         $result .= ">$link_text</a>";
625                         $result = $this->hashPart($result);
626                 }
627                 else {
628                         $result = $whole_match;
629                 }
630                 return $result;
631         }
632         protected function _doAnchors_inline_callback($matches) {
633                 $whole_match    =  $matches[1];
634                 $link_text              =  $this->runSpanGamut($matches[2]);
635                 $url                    =  $matches[3] == '' ? $matches[4] : $matches[3];
636                 $title                  =& $matches[7];
638                 // if the URL was of the form <s p a c e s> it got caught by the HTML
639                 // tag parser and hashed. Need to reverse the process before using the URL.
640                 $unhashed = $this->unhash($url);
641                 if ($unhashed != $url)
642                         $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
644                 $url = $this->encodeURLAttribute($url);
646                 $result = "<a href=\"$url\"";
647                 if (isset($title)) {
648                         $title = $this->encodeAttribute($title);
649                         $result .=  " title=\"$title\"";
650                 }
651                 
652                 $link_text = $this->runSpanGamut($link_text);
653                 $result .= ">$link_text</a>";
655                 return $this->hashPart($result);
656         }
659         protected function doImages($text) {
660         #
661         # Turn Markdown image shortcuts into <img> tags.
662         #
663                 #
664                 # First, handle reference-style labeled images: ![alt text][id]
665                 #
666                 $text = preg_replace_callback('{
667                         (                               # wrap whole match in $1
668                           !\[
669                                 ('.$this->nested_brackets_re.')         # alt text = $2
670                           \]
672                           [ ]?                          # one optional space
673                           (?:\n[ ]*)?           # one optional newline followed by spaces
675                           \[
676                                 (.*?)           # id = $3
677                           \]
679                         )
680                         }xs', 
681                         array($this, '_doImages_reference_callback'), $text);
683                 #
684                 # Next, handle inline images:  ![alt text](url "optional title")
685                 # Don't forget: encode * and _
686                 #
687                 $text = preg_replace_callback('{
688                         (                               # wrap whole match in $1
689                           !\[
690                                 ('.$this->nested_brackets_re.')         # alt text = $2
691                           \]
692                           \s?                   # One optional whitespace character
693                           \(                    # literal paren
694                                 [ \n]*
695                                 (?:
696                                         <(\S*)> # src url = $3
697                                 |
698                                         ('.$this->nested_url_parenthesis_re.')  # src url = $4
699                                 )
700                                 [ \n]*
701                                 (                       # $5
702                                   ([\'"])       # quote char = $6
703                                   (.*?)         # title = $7
704                                   \6            # matching quote
705                                   [ \n]*
706                                 )?                      # title is optional
707                           \)
708                         )
709                         }xs',
710                         array($this, '_doImages_inline_callback'), $text);
712                 return $text;
713         }
714         protected function _doImages_reference_callback($matches) {
715                 $whole_match = $matches[1];
716                 $alt_text    = $matches[2];
717                 $link_id     = strtolower($matches[3]);
719                 if ($link_id == "") {
720                         $link_id = strtolower($alt_text); # for shortcut links like ![this][].
721                 }
723                 $alt_text = $this->encodeAttribute($alt_text);
724                 if (isset($this->urls[$link_id])) {
725                         $url = $this->encodeURLAttribute($this->urls[$link_id]);
726                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
727                         if (isset($this->titles[$link_id])) {
728                                 $title = $this->titles[$link_id];
729                                 $title = $this->encodeAttribute($title);
730                                 $result .=  " title=\"$title\"";
731                         }
732                         $result .= $this->empty_element_suffix;
733                         $result = $this->hashPart($result);
734                 }
735                 else {
736                         # If there's no such link ID, leave intact:
737                         $result = $whole_match;
738                 }
740                 return $result;
741         }
742         protected function _doImages_inline_callback($matches) {
743                 $whole_match    = $matches[1];
744                 $alt_text               = $matches[2];
745                 $url                    = $matches[3] == '' ? $matches[4] : $matches[3];
746                 $title                  =& $matches[7];
748                 $alt_text = $this->encodeAttribute($alt_text);
749                 $url = $this->encodeURLAttribute($url);
750                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
751                 if (isset($title)) {
752                         $title = $this->encodeAttribute($title);
753                         $result .=  " title=\"$title\""; # $title already quoted
754                 }
755                 $result .= $this->empty_element_suffix;
757                 return $this->hashPart($result);
758         }
761         protected function doHeaders($text) {
762                 # Setext-style headers:
763                 #         Header 1
764                 #         ========
765                 #  
766                 #         Header 2
767                 #         --------
768                 #
769                 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
770                         array($this, '_doHeaders_callback_setext'), $text);
772                 # atx-style headers:
773                 #       # Header 1
774                 #       ## Header 2
775                 #       ## Header 2 with closing hashes ##
776                 #       ...
777                 #       ###### Header 6
778                 #
779                 $text = preg_replace_callback('{
780                                 ^(\#{1,6})      # $1 = string of #\'s
781                                 [ ]*
782                                 (.+?)           # $2 = Header text
783                                 [ ]*
784                                 \#*                     # optional closing #\'s (not counted)
785                                 \n+
786                         }xm',
787                         array($this, '_doHeaders_callback_atx'), $text);
789                 return $text;
790         }
792         protected function _doHeaders_callback_setext($matches) {
793                 # Terrible hack to check we haven't found an empty list item.
794                 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
795                         return $matches[0];
796                 
797                 $level = $matches[2]{0} == '=' ? 1 : 2;
799                 # id attribute generation
800                 $idAtt = $this->_generateIdFromHeaderValue($matches[1]);
802                 $block = "<h$level$idAtt>".$this->runSpanGamut($matches[1])."</h$level>";
803                 return "\n" . $this->hashBlock($block) . "\n\n";
804         }
805         protected function _doHeaders_callback_atx($matches) {
807                 # id attribute generation
808                 $idAtt = $this->_generateIdFromHeaderValue($matches[2]);
810                 $level = strlen($matches[1]);
811                 $block = "<h$level$idAtt>".$this->runSpanGamut($matches[2])."</h$level>";
812                 return "\n" . $this->hashBlock($block) . "\n\n";
813         }
815         protected function _generateIdFromHeaderValue($headerValue) {
817                 # if a header_id_func property is set, we can use it to automatically
818                 # generate an id attribute.
819                 #
820                 # This method returns a string in the form id="foo", or an empty string
821                 # otherwise.
822                 if (!is_callable($this->header_id_func)) {
823                         return "";
824                 }
825                 $idValue = call_user_func($this->header_id_func, $headerValue);
826                 if (!$idValue) return "";
828                 return ' id="' . $this->encodeAttribute($idValue) . '"';
830         }
832         protected function doLists($text) {
833         #
834         # Form HTML ordered (numbered) and unordered (bulleted) lists.
835         #
836                 $less_than_tab = $this->tab_width - 1;
838                 # Re-usable patterns to match list item bullets and number markers:
839                 $marker_ul_re  = '[*+-]';
840                 $marker_ol_re  = '\d+[\.]';
842                 $markers_relist = array(
843                         $marker_ul_re => $marker_ol_re,
844                         $marker_ol_re => $marker_ul_re,
845                         );
847                 foreach ($markers_relist as $marker_re => $other_marker_re) {
848                         # Re-usable pattern to match any entirel ul or ol list:
849                         $whole_list_re = '
850                                 (                                                               # $1 = whole list
851                                   (                                                             # $2
852                                         ([ ]{0,'.$less_than_tab.'})     # $3 = number of spaces
853                                         ('.$marker_re.')                        # $4 = first list item marker
854                                         [ ]+
855                                   )
856                                   (?s:.+?)
857                                   (                                                             # $5
858                                           \z
859                                         |
860                                           \n{2,}
861                                           (?=\S)
862                                           (?!                                           # Negative lookahead for another list item marker
863                                                 [ ]*
864                                                 '.$marker_re.'[ ]+
865                                           )
866                                         |
867                                           (?=                                           # Lookahead for another kind of list
868                                             \n
869                                                 \3                                              # Must have the same indentation
870                                                 '.$other_marker_re.'[ ]+
871                                           )
872                                   )
873                                 )
874                         '; // mx
875                         
876                         # We use a different prefix before nested lists than top-level lists.
877                         # See extended comment in _ProcessListItems().
878                 
879                         if ($this->list_level) {
880                                 $text = preg_replace_callback('{
881                                                 ^
882                                                 '.$whole_list_re.'
883                                         }mx',
884                                         array($this, '_doLists_callback'), $text);
885                         }
886                         else {
887                                 $text = preg_replace_callback('{
888                                                 (?:(?<=\n)\n|\A\n?) # Must eat the newline
889                                                 '.$whole_list_re.'
890                                         }mx',
891                                         array($this, '_doLists_callback'), $text);
892                         }
893                 }
895                 return $text;
896         }
897         protected function _doLists_callback($matches) {
898                 # Re-usable patterns to match list item bullets and number markers:
899                 $marker_ul_re  = '[*+-]';
900                 $marker_ol_re  = '\d+[\.]';
901                 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
902                 $marker_ol_start_re = '[0-9]+';
904                 $list = $matches[1];
905                 $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
907                 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
909                 $list .= "\n";
910                 $result = $this->processListItems($list, $marker_any_re);
912                 $ol_start = 1;
913                 if ($this->enhanced_ordered_list) {
914                         # Get the start number for ordered list.
915                         if ($list_type == 'ol') {
916                                 $ol_start_array = array();
917                                 $ol_start_check = preg_match("/$marker_ol_start_re/", $matches[4], $ol_start_array);
918                                 if ($ol_start_check){
919                                         $ol_start = $ol_start_array[0];
920                                 }
921                         }
922                 }
924                 if ($ol_start > 1 && $list_type == 'ol'){
925                         $result = $this->hashBlock("<$list_type start=\"$ol_start\">\n" . $result . "</$list_type>");
926                 } else {
927                         $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
928                 }
929                 return "\n". $result ."\n\n";
930         }
932         protected $list_level = 0;
934         protected function processListItems($list_str, $marker_any_re) {
935         #
936         #       Process the contents of a single ordered or unordered list, splitting it
937         #       into individual list items.
938         #
939                 # The $this->list_level global keeps track of when we're inside a list.
940                 # Each time we enter a list, we increment it; when we leave a list,
941                 # we decrement. If it's zero, we're not in a list anymore.
942                 #
943                 # We do this because when we're not inside a list, we want to treat
944                 # something like this:
945                 #
946                 #               I recommend upgrading to version
947                 #               8. Oops, now this line is treated
948                 #               as a sub-list.
949                 #
950                 # As a single paragraph, despite the fact that the second line starts
951                 # with a digit-period-space sequence.
952                 #
953                 # Whereas when we're inside a list (or sub-list), that line will be
954                 # treated as the start of a sub-list. What a kludge, huh? This is
955                 # an aspect of Markdown's syntax that's hard to parse perfectly
956                 # without resorting to mind-reading. Perhaps the solution is to
957                 # change the syntax rules such that sub-lists must start with a
958                 # starting cardinal number; e.g. "1." or "a.".
959                 
960                 $this->list_level++;
962                 # trim trailing blank lines:
963                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
965                 $list_str = preg_replace_callback('{
966                         (\n)?                                                   # leading line = $1
967                         (^[ ]*)                                                 # leading whitespace = $2
968                         ('.$marker_any_re.'                             # list marker and space = $3
969                                 (?:[ ]+|(?=\n)) # space only required if item is not empty
970                         )
971                         ((?s:.*?))                                              # list item text   = $4
972                         (?:(\n+(?=\n))|\n)                              # tailing blank line = $5
973                         (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
974                         }xm',
975                         array($this, '_processListItems_callback'), $list_str);
977                 $this->list_level--;
978                 return $list_str;
979         }
980         protected function _processListItems_callback($matches) {
981                 $item = $matches[4];
982                 $leading_line =& $matches[1];
983                 $leading_space =& $matches[2];
984                 $marker_space = $matches[3];
985                 $tailing_blank_line =& $matches[5];
987                 if ($leading_line || $tailing_blank_line || 
988                         preg_match('/\n{2,}/', $item))
989                 {
990                         # Replace marker with the appropriate whitespace indentation
991                         $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
992                         $item = $this->runBlockGamut($this->outdent($item)."\n");
993                 }
994                 else {
995                         # Recursion for sub-lists:
996                         $item = $this->doLists($this->outdent($item));
997                         $item = preg_replace('/\n+$/', '', $item);
998                         $item = $this->runSpanGamut($item);
999                 }
1001                 return "<li>" . $item . "</li>\n";
1002         }
1005         protected function doCodeBlocks($text) {
1006         #
1007         #       Process Markdown `<pre><code>` blocks.
1008         #
1009                 $text = preg_replace_callback('{
1010                                 (?:\n\n|\A\n?)
1011                                 (                   # $1 = the code block -- one or more lines, starting with a space/tab
1012                                   (?>
1013                                         [ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
1014                                         .*\n+
1015                                   )+
1016                                 )
1017                                 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1018                         }xm',
1019                         array($this, '_doCodeBlocks_callback'), $text);
1021                 return $text;
1022         }
1023         protected function _doCodeBlocks_callback($matches) {
1024                 $codeblock = $matches[1];
1026                 $codeblock = $this->outdent($codeblock);
1027                 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1029                 # trim leading newlines and trailing newlines
1030                 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1032                 $codeblock = "<pre><code>$codeblock\n</code></pre>";
1033                 return "\n\n".$this->hashBlock($codeblock)."\n\n";
1034         }
1037         protected function makeCodeSpan($code) {
1038         #
1039         # Create a code span markup for $code. Called from handleSpanToken.
1040         #
1041                 $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
1042                 return $this->hashPart("<code>$code</code>");
1043         }
1046         protected $em_relist = array(
1047                 ''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)',
1048                 '*' => '(?<![\s*])\*(?!\*)',
1049                 '_' => '(?<![\s_])_(?!_)',
1050                 );
1051         protected $strong_relist = array(
1052                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)',
1053                 '**' => '(?<![\s*])\*\*(?!\*)',
1054                 '__' => '(?<![\s_])__(?!_)',
1055                 );
1056         protected $em_strong_relist = array(
1057                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)',
1058                 '***' => '(?<![\s*])\*\*\*(?!\*)',
1059                 '___' => '(?<![\s_])___(?!_)',
1060                 );
1061         protected $em_strong_prepared_relist;
1062         
1063         protected function prepareItalicsAndBold() {
1064         #
1065         # Prepare regular expressions for searching emphasis tokens in any
1066         # context.
1067         #
1068                 foreach ($this->em_relist as $em => $em_re) {
1069                         foreach ($this->strong_relist as $strong => $strong_re) {
1070                                 # Construct list of allowed token expressions.
1071                                 $token_relist = array();
1072                                 if (isset($this->em_strong_relist["$em$strong"])) {
1073                                         $token_relist[] = $this->em_strong_relist["$em$strong"];
1074                                 }
1075                                 $token_relist[] = $em_re;
1076                                 $token_relist[] = $strong_re;
1077                                 
1078                                 # Construct master expression from list.
1079                                 $token_re = '{('. implode('|', $token_relist) .')}';
1080                                 $this->em_strong_prepared_relist["$em$strong"] = $token_re;
1081                         }
1082                 }
1083         }
1084         
1085         protected function doItalicsAndBold($text) {
1086                 $token_stack = array('');
1087                 $text_stack = array('');
1088                 $em = '';
1089                 $strong = '';
1090                 $tree_char_em = false;
1091                 
1092                 while (1) {
1093                         #
1094                         # Get prepared regular expression for seraching emphasis tokens
1095                         # in current context.
1096                         #
1097                         $token_re = $this->em_strong_prepared_relist["$em$strong"];
1098                         
1099                         #
1100                         # Each loop iteration search for the next emphasis token. 
1101                         # Each token is then passed to handleSpanToken.
1102                         #
1103                         $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1104                         $text_stack[0] .= $parts[0];
1105                         $token =& $parts[1];
1106                         $text =& $parts[2];
1107                         
1108                         if (empty($token)) {
1109                                 # Reached end of text span: empty stack without emitting.
1110                                 # any more emphasis.
1111                                 while ($token_stack[0]) {
1112                                         $text_stack[1] .= array_shift($token_stack);
1113                                         $text_stack[0] .= array_shift($text_stack);
1114                                 }
1115                                 break;
1116                         }
1117                         
1118                         $token_len = strlen($token);
1119                         if ($tree_char_em) {
1120                                 # Reached closing marker while inside a three-char emphasis.
1121                                 if ($token_len == 3) {
1122                                         # Three-char closing marker, close em and strong.
1123                                         array_shift($token_stack);
1124                                         $span = array_shift($text_stack);
1125                                         $span = $this->runSpanGamut($span);
1126                                         $span = "<strong><em>$span</em></strong>";
1127                                         $text_stack[0] .= $this->hashPart($span);
1128                                         $em = '';
1129                                         $strong = '';
1130                                 } else {
1131                                         # Other closing marker: close one em or strong and
1132                                         # change current token state to match the other
1133                                         $token_stack[0] = str_repeat($token{0}, 3-$token_len);
1134                                         $tag = $token_len == 2 ? "strong" : "em";
1135                                         $span = $text_stack[0];
1136                                         $span = $this->runSpanGamut($span);
1137                                         $span = "<$tag>$span</$tag>";
1138                                         $text_stack[0] = $this->hashPart($span);
1139                                         $$tag = ''; # $$tag stands for $em or $strong
1140                                 }
1141                                 $tree_char_em = false;
1142                         } else if ($token_len == 3) {
1143                                 if ($em) {
1144                                         # Reached closing marker for both em and strong.
1145                                         # Closing strong marker:
1146                                         for ($i = 0; $i < 2; ++$i) {
1147                                                 $shifted_token = array_shift($token_stack);
1148                                                 $tag = strlen($shifted_token) == 2 ? "strong" : "em";
1149                                                 $span = array_shift($text_stack);
1150                                                 $span = $this->runSpanGamut($span);
1151                                                 $span = "<$tag>$span</$tag>";
1152                                                 $text_stack[0] .= $this->hashPart($span);
1153                                                 $$tag = ''; # $$tag stands for $em or $strong
1154                                         }
1155                                 } else {
1156                                         # Reached opening three-char emphasis marker. Push on token 
1157                                         # stack; will be handled by the special condition above.
1158                                         $em = $token{0};
1159                                         $strong = "$em$em";
1160                                         array_unshift($token_stack, $token);
1161                                         array_unshift($text_stack, '');
1162                                         $tree_char_em = true;
1163                                 }
1164                         } else if ($token_len == 2) {
1165                                 if ($strong) {
1166                                         # Unwind any dangling emphasis marker:
1167                                         if (strlen($token_stack[0]) == 1) {
1168                                                 $text_stack[1] .= array_shift($token_stack);
1169                                                 $text_stack[0] .= array_shift($text_stack);
1170                                         }
1171                                         # Closing strong marker:
1172                                         array_shift($token_stack);
1173                                         $span = array_shift($text_stack);
1174                                         $span = $this->runSpanGamut($span);
1175                                         $span = "<strong>$span</strong>";
1176                                         $text_stack[0] .= $this->hashPart($span);
1177                                         $strong = '';
1178                                 } else {
1179                                         array_unshift($token_stack, $token);
1180                                         array_unshift($text_stack, '');
1181                                         $strong = $token;
1182                                 }
1183                         } else {
1184                                 # Here $token_len == 1
1185                                 if ($em) {
1186                                         if (strlen($token_stack[0]) == 1) {
1187                                                 # Closing emphasis marker:
1188                                                 array_shift($token_stack);
1189                                                 $span = array_shift($text_stack);
1190                                                 $span = $this->runSpanGamut($span);
1191                                                 $span = "<em>$span</em>";
1192                                                 $text_stack[0] .= $this->hashPart($span);
1193                                                 $em = '';
1194                                         } else {
1195                                                 $text_stack[0] .= $token;
1196                                         }
1197                                 } else {
1198                                         array_unshift($token_stack, $token);
1199                                         array_unshift($text_stack, '');
1200                                         $em = $token;
1201                                 }
1202                         }
1203                 }
1204                 return $text_stack[0];
1205         }
1208         protected function doBlockQuotes($text) {
1209                 $text = preg_replace_callback('/
1210                           (                                                             # Wrap whole match in $1
1211                                 (?>
1212                                   ^[ ]*>[ ]?                    # ">" at the start of a line
1213                                         .+\n                                    # rest of the first line
1214                                   (.+\n)*                                       # subsequent consecutive lines
1215                                   \n*                                           # blanks
1216                                 )+
1217                           )
1218                         /xm',
1219                         array($this, '_doBlockQuotes_callback'), $text);
1221                 return $text;
1222         }
1223         protected function _doBlockQuotes_callback($matches) {
1224                 $bq = $matches[1];
1225                 # trim one level of quoting - trim whitespace-only lines
1226                 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1227                 $bq = $this->runBlockGamut($bq);                # recurse
1229                 $bq = preg_replace('/^/m', "  ", $bq);
1230                 # These leading spaces cause problem with <pre> content, 
1231                 # so we need to fix that:
1232                 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', 
1233                         array($this, '_doBlockQuotes_callback2'), $bq);
1235                 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1236         }
1237         protected function _doBlockQuotes_callback2($matches) {
1238                 $pre = $matches[1];
1239                 $pre = preg_replace('/^  /m', '', $pre);
1240                 return $pre;
1241         }
1244         protected function formParagraphs($text) {
1245         #
1246         #       Params:
1247         #               $text - string to process with html <p> tags
1248         #
1249                 # Strip leading and trailing lines:
1250                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1252                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1254                 #
1255                 # Wrap <p> tags and unhashify HTML blocks
1256                 #
1257                 foreach ($grafs as $key => $value) {
1258                         if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1259                                 # Is a paragraph.
1260                                 $value = $this->runSpanGamut($value);
1261                                 $value = preg_replace('/^([ ]*)/', "<p>", $value);
1262                                 $value .= "</p>";
1263                                 $grafs[$key] = $this->unhash($value);
1264                         }
1265                         else {
1266                                 # Is a block.
1267                                 # Modify elements of @grafs in-place...
1268                                 $graf = $value;
1269                                 $block = $this->html_hashes[$graf];
1270                                 $graf = $block;
1271 //                              if (preg_match('{
1272 //                                      \A
1273 //                                      (                                                       # $1 = <div> tag
1274 //                                        <div  \s+
1275 //                                        [^>]*
1276 //                                        \b
1277 //                                        markdown\s*=\s*  ([\'"])      #       $2 = attr quote char
1278 //                                        1
1279 //                                        \2
1280 //                                        [^>]*
1281 //                                        >
1282 //                                      )
1283 //                                      (                                                       # $3 = contents
1284 //                                      .*
1285 //                                      )
1286 //                                      (</div>)                                        # $4 = closing tag
1287 //                                      \z
1288 //                                      }xs', $block, $matches))
1289 //                              {
1290 //                                      list(, $div_open, , $div_content, $div_close) = $matches;
1291 //
1292 //                                      # We can't call Markdown(), because that resets the hash;
1293 //                                      # that initialization code should be pulled into its own sub, though.
1294 //                                      $div_content = $this->hashHTMLBlocks($div_content);
1295 //                                      
1296 //                                      # Run document gamut methods on the content.
1297 //                                      foreach ($this->document_gamut as $method => $priority) {
1298 //                                              $div_content = $this->$method($div_content);
1299 //                                      }
1300 //
1301 //                                      $div_open = preg_replace(
1302 //                                              '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1303 //
1304 //                                      $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1305 //                              }
1306                                 $grafs[$key] = $graf;
1307                         }
1308                 }
1310                 return implode("\n\n", $grafs);
1311         }
1314         protected function encodeAttribute($text) {
1315         #
1316         # Encode text for a double-quoted HTML attribute. This function
1317         # is *not* suitable for attributes enclosed in single quotes.
1318         #
1319                 $text = $this->encodeAmpsAndAngles($text);
1320                 $text = str_replace('"', '&quot;', $text);
1321                 return $text;
1322         }
1325         protected function encodeURLAttribute($url, &$text = null) {
1326         #
1327         # Encode text for a double-quoted HTML attribute containing a URL,
1328         # applying the URL filter if set. Also generates the textual
1329         # representation for the URL (removing mailto: or tel:) storing it in $text.
1330         # This function is *not* suitable for attributes enclosed in single quotes.
1331         #
1332                 if ($this->url_filter_func)
1333                         $url = call_user_func($this->url_filter_func, $url);
1335                 if (preg_match('{^mailto:}i', $url))
1336                         $url = $this->encodeEntityObfuscatedAttribute($url, $text, 7);
1337                 else if (preg_match('{^tel:}i', $url))
1338                 {
1339                         $url = $this->encodeAttribute($url);
1340                         $text = substr($url, 4);
1341                 }
1342                 else
1343                 {
1344                         $url = $this->encodeAttribute($url);
1345                         $text = $url;
1346                 }
1348                 return $url;
1349         }
1350         
1351         
1352         protected function encodeAmpsAndAngles($text) {
1353         #
1354         # Smart processing for ampersands and angle brackets that need to 
1355         # be encoded. Valid character entities are left alone unless the
1356         # no-entities mode is set.
1357         #
1358                 if ($this->no_entities) {
1359                         $text = str_replace('&', '&amp;', $text);
1360                 } else {
1361                         # Ampersand-encoding based entirely on Nat Irons's Amputator
1362                         # MT plugin: <http://bumppo.net/projects/amputator/>
1363                         $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', 
1364                                                                 '&amp;', $text);
1365                 }
1366                 # Encode remaining <'s
1367                 $text = str_replace('<', '&lt;', $text);
1369                 return $text;
1370         }
1373         protected function doAutoLinks($text) {
1374                 $text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i',
1375                         array($this, '_doAutoLinks_url_callback'), $text);
1377                 # Email addresses: <address@domain.foo>
1378                 $text = preg_replace_callback('{
1379                         <
1380                         (?:mailto:)?
1381                         (
1382                                 (?:
1383                                         [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1384                                 |
1385                                         ".*?"
1386                                 )
1387                                 \@
1388                                 (?:
1389                                         [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1390                                 |
1391                                         \[[\d.a-fA-F:]+\]       # IPv4 & IPv6
1392                                 )
1393                         )
1394                         >
1395                         }xi',
1396                         array($this, '_doAutoLinks_email_callback'), $text);
1398                 return $text;
1399         }
1400         protected function _doAutoLinks_url_callback($matches) {
1401                 $url = $this->encodeURLAttribute($matches[1], $text);
1402                 $link = "<a href=\"$url\">$text</a>";
1403                 return $this->hashPart($link);
1404         }
1405         protected function _doAutoLinks_email_callback($matches) {
1406                 $addr = $matches[1];
1407                 $url = $this->encodeURLAttribute("mailto:$addr", $text);
1408                 $link = "<a href=\"$url\">$text</a>";
1409                 return $this->hashPart($link);
1410         }
1413         protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) {
1414         #
1415         #       Input: some text to obfuscate, e.g. "mailto:foo@example.com"
1416         #
1417         #       Output: the same text but with most characters encoded as either a
1418         #               decimal or hex entity, in the hopes of foiling most address
1419         #               harvesting spam bots. E.g.:
1420         #
1421         #        &#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1422         #        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1423         #        &#x6d;
1424         #
1425         #       Note: the additional output $tail is assigned the same value as the
1426         #       ouput, minus the number of characters specified by $head_length.
1427         #
1428         #       Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1429         #   With some optimizations by Milian Wolff. Forced encoding of HTML
1430         #       attribute special characters by Allan Odgaard.
1431         #
1432                 if ($text == "") return $tail = "";
1434                 $chars = preg_split('/(?<!^)(?!$)/', $text);
1435                 $seed = (int)abs(crc32($text) / strlen($text)); # Deterministic seed.
1437                 foreach ($chars as $key => $char) {
1438                         $ord = ord($char);
1439                         # Ignore non-ascii chars.
1440                         if ($ord < 128) {
1441                                 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1442                                 # roughly 10% raw, 45% hex, 45% dec
1443                                 # '@' *must* be encoded. I insist.
1444                                 # '"' and '>' have to be encoded inside the attribute
1445                                 if ($r > 90 && strpos('@"&>', $char) === false) /* do nothing */;
1446                                 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1447                                 else              $chars[$key] = '&#'.$ord.';';
1448                         }
1449                 }
1451                 $text = implode('', $chars);
1452                 $tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text;
1454                 return $text;
1455         }
1458         protected function parseSpan($str) {
1459         #
1460         # Take the string $str and parse it into tokens, hashing embeded HTML,
1461         # escaped characters and handling code spans.
1462         #
1463                 $output = '';
1464                 
1465                 $span_re = '{
1466                                 (
1467                                         \\\\'.$this->escape_chars_re.'
1468                                 |
1469                                         (?<![`\\\\])
1470                                         `+                                              # code span marker
1471                         '.( $this->no_markup ? '' : '
1472                                 |
1473                                         <!--    .*?     -->             # comment
1474                                 |
1475                                         <\?.*?\?> | <%.*?%>             # processing instruction
1476                                 |
1477                                         <[!$]?[-a-zA-Z0-9:_]+   # regular tags
1478                                         (?>
1479                                                 \s
1480                                                 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1481                                         )?
1482                                         >
1483                                 |
1484                                         <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
1485                                 |
1486                                         </[-a-zA-Z0-9:_]+\s*> # closing tag
1487                         ').'
1488                                 )
1489                                 }xs';
1491                 while (1) {
1492                         #
1493                         # Each loop iteration seach for either the next tag, the next 
1494                         # openning code span marker, or the next escaped character. 
1495                         # Each token is then passed to handleSpanToken.
1496                         #
1497                         $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1498                         
1499                         # Create token from text preceding tag.
1500                         if ($parts[0] != "") {
1501                                 $output .= $parts[0];
1502                         }
1503                         
1504                         # Check if we reach the end.
1505                         if (isset($parts[1])) {
1506                                 $output .= $this->handleSpanToken($parts[1], $parts[2]);
1507                                 $str = $parts[2];
1508                         }
1509                         else {
1510                                 break;
1511                         }
1512                 }
1513                 
1514                 return $output;
1515         }
1516         
1517         
1518         protected function handleSpanToken($token, &$str) {
1519         #
1520         # Handle $token provided by parseSpan by determining its nature and 
1521         # returning the corresponding value that should replace it.
1522         #
1523                 switch ($token{0}) {
1524                         case "\\":
1525                                 return $this->hashPart("&#". ord($token{1}). ";");
1526                         case "`":
1527                                 # Search for end marker in remaining text.
1528                                 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', 
1529                                         $str, $matches))
1530                                 {
1531                                         $str = $matches[2];
1532                                         $codespan = $this->makeCodeSpan($matches[1]);
1533                                         return $this->hashPart($codespan);
1534                                 }
1535                                 return $token; // return as text since no ending marker found.
1536                         default:
1537                                 return $this->hashPart($token);
1538                 }
1539         }
1542         protected function outdent($text) {
1543         #
1544         # Remove one level of line-leading tabs or spaces
1545         #
1546                 return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
1547         }
1550         # String length function for detab. `_initDetab` will create a function to 
1551         # hanlde UTF-8 if the default function does not exist.
1552         protected $utf8_strlen = 'mb_strlen';
1553         
1554         protected function detab($text) {
1555         #
1556         # Replace tabs with the appropriate amount of space.
1557         #
1558                 # For each line we separate the line in blocks delemited by
1559                 # tab characters. Then we reconstruct every line by adding the 
1560                 # appropriate number of space between each blocks.
1561                 
1562                 $text = preg_replace_callback('/^.*\t.*$/m',
1563                         array($this, '_detab_callback'), $text);
1565                 return $text;
1566         }
1567         protected function _detab_callback($matches) {
1568                 $line = $matches[0];
1569                 $strlen = $this->utf8_strlen; # strlen function for UTF-8.
1570                 
1571                 # Split in blocks.
1572                 $blocks = explode("\t", $line);
1573                 # Add each blocks to the line.
1574                 $line = $blocks[0];
1575                 unset($blocks[0]); # Do not add first block twice.
1576                 foreach ($blocks as $block) {
1577                         # Calculate amount of space, insert spaces, insert block.
1578                         $amount = $this->tab_width - 
1579                                 $strlen($line, 'UTF-8') % $this->tab_width;
1580                         $line .= str_repeat(" ", $amount) . $block;
1581                 }
1582                 return $line;
1583         }
1584         protected function _initDetab() {
1585         #
1586         # Check for the availability of the function in the `utf8_strlen` property
1587         # (initially `mb_strlen`). If the function is not available, create a 
1588         # function that will loosely count the number of UTF-8 characters with a
1589         # regular expression.
1590         #
1591                 if (function_exists($this->utf8_strlen)) return;
1592                 $this->utf8_strlen = create_function('$text', 'return preg_match_all(
1593                         "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", 
1594                         $text, $m);');
1595         }
1598         protected function unhash($text) {
1599         #
1600         # Swap back in all the tags hashed by _HashHTMLBlocks.
1601         #
1602                 return preg_replace_callback('/(.)\x1A[0-9]+\1/', 
1603                         array($this, '_unhash_callback'), $text);
1604         }
1605         protected function _unhash_callback($matches) {
1606                 return $this->html_hashes[$matches[0]];
1607         }