Merge branch 'MDL-67377-master' of git://github.com/ferranrecio/moodle
[moodle.git] / lib / mustache / src / Mustache / Tokenizer.php
CommitLineData
9bdcf579
DW
1<?php
2
3/*
4 * This file is part of Mustache.php.
5 *
703baed2 6 * (c) 2010-2017 Justin Hileman
9bdcf579
DW
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12/**
13 * Mustache Tokenizer class.
14 *
15 * This class is responsible for turning raw template source into a set of Mustache tokens.
16 */
17class Mustache_Tokenizer
18{
19 // Finite state machine states
20 const IN_TEXT = 0;
21 const IN_TAG_TYPE = 1;
22 const IN_TAG = 2;
23
24 // Token types
25 const T_SECTION = '#';
26 const T_INVERTED = '^';
27 const T_END_SECTION = '/';
28 const T_COMMENT = '!';
29 const T_PARTIAL = '>';
30 const T_PARENT = '<';
31 const T_DELIM_CHANGE = '=';
32 const T_ESCAPED = '_v';
33 const T_UNESCAPED = '{';
34 const T_UNESCAPED_2 = '&';
35 const T_TEXT = '_t';
36 const T_PRAGMA = '%';
37 const T_BLOCK_VAR = '$';
38 const T_BLOCK_ARG = '$arg';
39
40 // Valid token types
41 private static $tagTypes = array(
42 self::T_SECTION => true,
43 self::T_INVERTED => true,
44 self::T_END_SECTION => true,
45 self::T_COMMENT => true,
46 self::T_PARTIAL => true,
47 self::T_PARENT => true,
48 self::T_DELIM_CHANGE => true,
49 self::T_ESCAPED => true,
50 self::T_UNESCAPED => true,
51 self::T_UNESCAPED_2 => true,
52 self::T_PRAGMA => true,
53 self::T_BLOCK_VAR => true,
54 );
55
9bdcf579
DW
56 // Token properties
57 const TYPE = 'type';
58 const NAME = 'name';
59 const OTAG = 'otag';
60 const CTAG = 'ctag';
61 const LINE = 'line';
62 const INDEX = 'index';
63 const END = 'end';
64 const INDENT = 'indent';
65 const NODES = 'nodes';
66 const VALUE = 'value';
67 const FILTERS = 'filters';
68
69 private $state;
70 private $tagType;
9bdcf579
DW
71 private $buffer;
72 private $tokens;
73 private $seenTag;
74 private $line;
d0098010 75
9bdcf579 76 private $otag;
d0098010 77 private $otagChar;
9bdcf579 78 private $otagLen;
d0098010
FR
79
80 private $ctag;
81 private $ctagChar;
9bdcf579
DW
82 private $ctagLen;
83
84 /**
85 * Scan and tokenize template source.
86 *
20df227e 87 * @throws Mustache_Exception_SyntaxException when mismatched section tags are encountered
d0098010 88 * @throws Mustache_Exception_InvalidArgumentException when $delimiters string is invalid
9bdcf579
DW
89 *
90 * @param string $text Mustache template source to tokenize
91 * @param string $delimiters Optionally, pass initial opening and closing delimiters (default: null)
92 *
93 * @return array Set of Mustache tokens
94 */
95 public function scan($text, $delimiters = null)
96 {
97 // Setting mbstring.func_overload makes things *really* slow.
98 // Let's do everyone a favor and scan this string as ASCII instead.
703baed2
AG
99 //
100 // @codeCoverageIgnoreStart
9bdcf579
DW
101 $encoding = null;
102 if (function_exists('mb_internal_encoding') && ini_get('mbstring.func_overload') & 2) {
103 $encoding = mb_internal_encoding();
104 mb_internal_encoding('ASCII');
105 }
703baed2 106 // @codeCoverageIgnoreEnd
9bdcf579
DW
107
108 $this->reset();
109
110 if ($delimiters = trim($delimiters)) {
111 $this->setDelimiters($delimiters);
112 }
113
114 $len = strlen($text);
115 for ($i = 0; $i < $len; $i++) {
116 switch ($this->state) {
117 case self::IN_TEXT:
d0098010
FR
118 $char = $text[$i];
119 // Test whether it's time to change tags.
120 if ($char === $this->otagChar && substr($text, $i, $this->otagLen) === $this->otag) {
9bdcf579
DW
121 $i--;
122 $this->flushBuffer();
123 $this->state = self::IN_TAG_TYPE;
124 } else {
9bdcf579
DW
125 $this->buffer .= $char;
126 if ($char === "\n") {
127 $this->flushBuffer();
128 $this->line++;
129 }
130 }
131 break;
132
133 case self::IN_TAG_TYPE:
134 $i += $this->otagLen - 1;
135 $char = $text[$i + 1];
136 if (isset(self::$tagTypes[$char])) {
137 $tag = $char;
138 $this->tagType = $tag;
139 } else {
140 $tag = null;
141 $this->tagType = self::T_ESCAPED;
142 }
143
144 if ($this->tagType === self::T_DELIM_CHANGE) {
145 $i = $this->changeDelimiters($text, $i);
146 $this->state = self::IN_TEXT;
147 } elseif ($this->tagType === self::T_PRAGMA) {
148 $i = $this->addPragma($text, $i);
149 $this->state = self::IN_TEXT;
150 } else {
151 if ($tag !== null) {
152 $i++;
153 }
154 $this->state = self::IN_TAG;
155 }
156 $this->seenTag = $i;
157 break;
158
159 default:
d0098010
FR
160 $char = $text[$i];
161 // Test whether it's time to change tags.
162 if ($char === $this->ctagChar && substr($text, $i, $this->ctagLen) === $this->ctag) {
9bdcf579
DW
163 $token = array(
164 self::TYPE => $this->tagType,
165 self::NAME => trim($this->buffer),
166 self::OTAG => $this->otag,
167 self::CTAG => $this->ctag,
168 self::LINE => $this->line,
85fa6a93 169 self::INDEX => ($this->tagType === self::T_END_SECTION) ? $this->seenTag - $this->otagLen : $i + $this->ctagLen,
9bdcf579
DW
170 );
171
172 if ($this->tagType === self::T_UNESCAPED) {
173 // Clean up `{{{ tripleStache }}}` style tokens.
174 if ($this->ctag === '}}') {
175 if (($i + 2 < $len) && $text[$i + 2] === '}') {
176 $i++;
177 } else {
178 $msg = sprintf(
179 'Mismatched tag delimiters: %s on line %d',
180 $token[self::NAME],
181 $token[self::LINE]
182 );
183
184 throw new Mustache_Exception_SyntaxException($msg, $token);
185 }
186 } else {
187 $lastName = $token[self::NAME];
188 if (substr($lastName, -1) === '}') {
189 $token[self::NAME] = trim(substr($lastName, 0, -1));
190 } else {
191 $msg = sprintf(
192 'Mismatched tag delimiters: %s on line %d',
193 $token[self::NAME],
194 $token[self::LINE]
195 );
196
197 throw new Mustache_Exception_SyntaxException($msg, $token);
198 }
199 }
200 }
201
202 $this->buffer = '';
203 $i += $this->ctagLen - 1;
204 $this->state = self::IN_TEXT;
205 $this->tokens[] = $token;
206 } else {
d0098010 207 $this->buffer .= $char;
9bdcf579
DW
208 }
209 break;
210 }
211 }
212
213 $this->flushBuffer();
214
215 // Restore the user's encoding...
703baed2 216 // @codeCoverageIgnoreStart
9bdcf579
DW
217 if ($encoding) {
218 mb_internal_encoding($encoding);
219 }
703baed2 220 // @codeCoverageIgnoreEnd
9bdcf579
DW
221
222 return $this->tokens;
223 }
224
225 /**
226 * Helper function to reset tokenizer internal state.
227 */
228 private function reset()
229 {
d0098010
FR
230 $this->state = self::IN_TEXT;
231 $this->tagType = null;
232 $this->buffer = '';
233 $this->tokens = array();
234 $this->seenTag = false;
235 $this->line = 0;
236
237 $this->otag = '{{';
238 $this->otagChar = '{';
239 $this->otagLen = 2;
240
241 $this->ctag = '}}';
242 $this->ctagChar = '}';
243 $this->ctagLen = 2;
9bdcf579
DW
244 }
245
246 /**
247 * Flush the current buffer to a token.
248 */
249 private function flushBuffer()
250 {
251 if (strlen($this->buffer) > 0) {
252 $this->tokens[] = array(
253 self::TYPE => self::T_TEXT,
254 self::LINE => $this->line,
85fa6a93 255 self::VALUE => $this->buffer,
9bdcf579
DW
256 );
257 $this->buffer = '';
258 }
259 }
260
261 /**
262 * Change the current Mustache delimiters. Set new `otag` and `ctag` values.
263 *
d0098010
FR
264 * @throws Mustache_Exception_SyntaxException when delimiter string is invalid
265 *
9bdcf579
DW
266 * @param string $text Mustache template source
267 * @param int $index Current tokenizer index
268 *
269 * @return int New index value
270 */
271 private function changeDelimiters($text, $index)
272 {
273 $startIndex = strpos($text, '=', $index) + 1;
85fa6a93 274 $close = '=' . $this->ctag;
9bdcf579
DW
275 $closeIndex = strpos($text, $close, $index);
276
d0098010 277 $token = array(
9bdcf579
DW
278 self::TYPE => self::T_DELIM_CHANGE,
279 self::LINE => $this->line,
280 );
281
d0098010
FR
282 try {
283 $this->setDelimiters(trim(substr($text, $startIndex, $closeIndex - $startIndex)));
284 } catch (Mustache_Exception_InvalidArgumentException $e) {
285 throw new Mustache_Exception_SyntaxException($e->getMessage(), $token);
286 }
287
288 $this->tokens[] = $token;
289
9bdcf579
DW
290 return $closeIndex + strlen($close) - 1;
291 }
292
293 /**
294 * Set the current Mustache `otag` and `ctag` delimiters.
295 *
d0098010
FR
296 * @throws Mustache_Exception_InvalidArgumentException when delimiter string is invalid
297 *
9bdcf579
DW
298 * @param string $delimiters
299 */
300 private function setDelimiters($delimiters)
301 {
d0098010
FR
302 if (!preg_match('/^\s*(\S+)\s+(\S+)\s*$/', $delimiters, $matches)) {
303 throw new Mustache_Exception_InvalidArgumentException(sprintf('Invalid delimiters: %s', $delimiters));
304 }
305
306 list($_, $otag, $ctag) = $matches;
307
308 $this->otag = $otag;
309 $this->otagChar = $otag[0];
310 $this->otagLen = strlen($otag);
311
312 $this->ctag = $ctag;
313 $this->ctagChar = $ctag[0];
314 $this->ctagLen = strlen($ctag);
9bdcf579
DW
315 }
316
317 /**
318 * Add pragma token.
319 *
320 * Pragmas are hoisted to the front of the template, so all pragma tokens
321 * will appear at the front of the token list.
322 *
323 * @param string $text
324 * @param int $index
325 *
326 * @return int New index value
327 */
328 private function addPragma($text, $index)
329 {
330 $end = strpos($text, $this->ctag, $index);
331 $pragma = trim(substr($text, $index + 2, $end - $index - 2));
332
333 // Pragmas are hoisted to the front of the template.
334 array_unshift($this->tokens, array(
335 self::TYPE => self::T_PRAGMA,
336 self::NAME => $pragma,
337 self::LINE => 0,
338 ));
339
340 return $end + $this->ctagLen - 1;
341 }
9bdcf579 342}