Commit | Line | Data |
---|---|---|
fbe18cc0 FM |
1 | <?php |
2 | ||
3 | namespace Sabberworm\CSS; | |
4 | ||
5 | use Sabberworm\CSS\CSSList\CSSList; | |
6 | use Sabberworm\CSS\CSSList\Document; | |
7 | use Sabberworm\CSS\CSSList\KeyFrame; | |
8 | use Sabberworm\CSS\Parsing\SourceException; | |
9 | use Sabberworm\CSS\Property\AtRule; | |
10 | use Sabberworm\CSS\Property\Import; | |
11 | use Sabberworm\CSS\Property\Charset; | |
12 | use Sabberworm\CSS\Property\CSSNamespace; | |
13 | use Sabberworm\CSS\RuleSet\AtRuleSet; | |
14 | use Sabberworm\CSS\CSSList\AtRuleBlockList; | |
15 | use Sabberworm\CSS\RuleSet\DeclarationBlock; | |
16 | use Sabberworm\CSS\Value\CSSFunction; | |
17 | use Sabberworm\CSS\Value\RuleValueList; | |
18 | use Sabberworm\CSS\Value\Size; | |
19 | use Sabberworm\CSS\Value\Color; | |
20 | use Sabberworm\CSS\Value\URL; | |
21 | use Sabberworm\CSS\Value\CSSString; | |
22 | use Sabberworm\CSS\Rule\Rule; | |
23 | use Sabberworm\CSS\Parsing\UnexpectedTokenException; | |
24 | use Sabberworm\CSS\Comment\Comment; | |
25 | ||
26 | /** | |
27 | * Parser class parses CSS from text into a data structure. | |
28 | */ | |
29 | class Parser { | |
30 | ||
31 | private $sText; | |
32 | private $aText; | |
33 | private $iCurrentPosition; | |
34 | private $oParserSettings; | |
35 | private $sCharset; | |
36 | private $iLength; | |
37 | private $blockRules; | |
38 | private $aSizeUnits; | |
39 | private $iLineNo; | |
40 | ||
41 | /** | |
42 | * Parser constructor. | |
43 | * Note that that iLineNo starts from 1 and not 0 | |
44 | * | |
45 | * @param $sText | |
46 | * @param Settings|null $oParserSettings | |
47 | * @param int $iLineNo | |
48 | */ | |
49 | public function __construct($sText, Settings $oParserSettings = null, $iLineNo = 1) { | |
50 | $this->sText = $sText; | |
51 | $this->iCurrentPosition = 0; | |
52 | $this->iLineNo = $iLineNo; | |
53 | if ($oParserSettings === null) { | |
54 | $oParserSettings = Settings::create(); | |
55 | } | |
56 | $this->oParserSettings = $oParserSettings; | |
57 | $this->blockRules = explode('/', AtRule::BLOCK_RULES); | |
58 | ||
59 | foreach (explode('/', Size::ABSOLUTE_SIZE_UNITS.'/'.Size::RELATIVE_SIZE_UNITS.'/'.Size::NON_SIZE_UNITS) as $val) { | |
60 | $iSize = strlen($val); | |
61 | if(!isset($this->aSizeUnits[$iSize])) { | |
62 | $this->aSizeUnits[$iSize] = array(); | |
63 | } | |
64 | $this->aSizeUnits[$iSize][strtolower($val)] = $val; | |
65 | } | |
66 | ksort($this->aSizeUnits, SORT_NUMERIC); | |
67 | } | |
68 | ||
69 | public function setCharset($sCharset) { | |
70 | $this->sCharset = $sCharset; | |
71 | $this->aText = $this->strsplit($this->sText); | |
72 | $this->iLength = count($this->aText); | |
73 | } | |
74 | ||
75 | public function getCharset() { | |
76 | return $this->sCharset; | |
77 | } | |
78 | ||
79 | public function parse() { | |
80 | $this->setCharset($this->oParserSettings->sDefaultCharset); | |
81 | $oResult = new Document($this->iLineNo); | |
82 | $this->parseDocument($oResult); | |
83 | return $oResult; | |
84 | } | |
85 | ||
86 | private function parseDocument(Document $oDocument) { | |
87 | $this->parseList($oDocument, true); | |
88 | } | |
89 | ||
90 | private function parseList(CSSList $oList, $bIsRoot = false) { | |
91 | while (!$this->isEnd()) { | |
92 | $comments = $this->consumeWhiteSpace(); | |
93 | $oListItem = null; | |
94 | if($this->oParserSettings->bLenientParsing) { | |
95 | try { | |
96 | $oListItem = $this->parseListItem($oList, $bIsRoot); | |
97 | } catch (UnexpectedTokenException $e) { | |
98 | $oListItem = false; | |
99 | } | |
100 | } else { | |
101 | $oListItem = $this->parseListItem($oList, $bIsRoot); | |
102 | } | |
103 | if($oListItem === null) { | |
104 | // List parsing finished | |
105 | return; | |
106 | } | |
107 | if($oListItem) { | |
108 | $oListItem->setComments($comments); | |
109 | $oList->append($oListItem); | |
110 | } | |
111 | } | |
112 | if (!$bIsRoot) { | |
113 | throw new SourceException("Unexpected end of document", $this->iLineNo); | |
114 | } | |
115 | } | |
116 | ||
117 | private function parseListItem(CSSList $oList, $bIsRoot = false) { | |
118 | if ($this->comes('@')) { | |
119 | $oAtRule = $this->parseAtRule(); | |
120 | if($oAtRule instanceof Charset) { | |
121 | if(!$bIsRoot) { | |
122 | throw new UnexpectedTokenException('@charset may only occur in root document', '', 'custom', $this->iLineNo); | |
123 | } | |
124 | if(count($oList->getContents()) > 0) { | |
125 | throw new UnexpectedTokenException('@charset must be the first parseable token in a document', '', 'custom', $this->iLineNo); | |
126 | } | |
127 | $this->setCharset($oAtRule->getCharset()->getString()); | |
128 | } | |
129 | return $oAtRule; | |
130 | } else if ($this->comes('}')) { | |
131 | $this->consume('}'); | |
132 | if ($bIsRoot) { | |
133 | throw new SourceException("Unopened {", $this->iLineNo); | |
134 | } else { | |
135 | return null; | |
136 | } | |
137 | } else { | |
138 | return $this->parseSelector(); | |
139 | } | |
140 | } | |
141 | ||
142 | private function parseAtRule() { | |
143 | $this->consume('@'); | |
144 | $sIdentifier = $this->parseIdentifier(false); | |
145 | $iIdentifierLineNum = $this->iLineNo; | |
146 | $this->consumeWhiteSpace(); | |
147 | if ($sIdentifier === 'import') { | |
148 | $oLocation = $this->parseURLValue(); | |
149 | $this->consumeWhiteSpace(); | |
150 | $sMediaQuery = null; | |
151 | if (!$this->comes(';')) { | |
152 | $sMediaQuery = $this->consumeUntil(';'); | |
153 | } | |
154 | $this->consume(';'); | |
155 | return new Import($oLocation, $sMediaQuery, $iIdentifierLineNum); | |
156 | } else if ($sIdentifier === 'charset') { | |
157 | $sCharset = $this->parseStringValue(); | |
158 | $this->consumeWhiteSpace(); | |
159 | $this->consume(';'); | |
160 | return new Charset($sCharset, $iIdentifierLineNum); | |
161 | } else if ($this->identifierIs($sIdentifier, 'keyframes')) { | |
162 | $oResult = new KeyFrame($iIdentifierLineNum); | |
163 | $oResult->setVendorKeyFrame($sIdentifier); | |
164 | $oResult->setAnimationName(trim($this->consumeUntil('{', false, true))); | |
165 | $this->parseList($oResult); | |
166 | return $oResult; | |
167 | } else if ($sIdentifier === 'namespace') { | |
168 | $sPrefix = null; | |
169 | $mUrl = $this->parsePrimitiveValue(); | |
170 | if (!$this->comes(';')) { | |
171 | $sPrefix = $mUrl; | |
172 | $mUrl = $this->parsePrimitiveValue(); | |
173 | } | |
174 | $this->consume(';'); | |
175 | if ($sPrefix !== null && !is_string($sPrefix)) { | |
176 | throw new UnexpectedTokenException('Wrong namespace prefix', $sPrefix, 'custom', $iIdentifierLineNum); | |
177 | } | |
178 | if (!($mUrl instanceof CSSString || $mUrl instanceof URL)) { | |
179 | throw new UnexpectedTokenException('Wrong namespace url of invalid type', $mUrl, 'custom', $iIdentifierLineNum); | |
180 | } | |
181 | return new CSSNamespace($mUrl, $sPrefix, $iIdentifierLineNum); | |
182 | } else { | |
183 | //Unknown other at rule (font-face or such) | |
184 | $sArgs = trim($this->consumeUntil('{', false, true)); | |
185 | $bUseRuleSet = true; | |
186 | foreach($this->blockRules as $sBlockRuleName) { | |
187 | if($this->identifierIs($sIdentifier, $sBlockRuleName)) { | |
188 | $bUseRuleSet = false; | |
189 | break; | |
190 | } | |
191 | } | |
192 | if($bUseRuleSet) { | |
193 | $oAtRule = new AtRuleSet($sIdentifier, $sArgs, $iIdentifierLineNum); | |
194 | $this->parseRuleSet($oAtRule); | |
195 | } else { | |
196 | $oAtRule = new AtRuleBlockList($sIdentifier, $sArgs, $iIdentifierLineNum); | |
197 | $this->parseList($oAtRule); | |
198 | } | |
199 | return $oAtRule; | |
200 | } | |
201 | } | |
202 | ||
203 | private function parseIdentifier($bAllowFunctions = true, $bIgnoreCase = true) { | |
204 | $sResult = $this->parseCharacter(true); | |
205 | if ($sResult === null) { | |
206 | throw new UnexpectedTokenException($sResult, $this->peek(5), 'identifier', $this->iLineNo); | |
207 | } | |
208 | $sCharacter = null; | |
209 | while (($sCharacter = $this->parseCharacter(true)) !== null) { | |
210 | $sResult .= $sCharacter; | |
211 | } | |
212 | if ($bIgnoreCase) { | |
213 | $sResult = $this->strtolower($sResult); | |
214 | } | |
215 | if ($bAllowFunctions && $this->comes('(')) { | |
216 | $this->consume('('); | |
217 | $aArguments = $this->parseValue(array('=', ' ', ',')); | |
218 | $sResult = new CSSFunction($sResult, $aArguments, ',', $this->iLineNo); | |
219 | $this->consume(')'); | |
220 | } | |
221 | return $sResult; | |
222 | } | |
223 | ||
224 | private function parseStringValue() { | |
225 | $sBegin = $this->peek(); | |
226 | $sQuote = null; | |
227 | if ($sBegin === "'") { | |
228 | $sQuote = "'"; | |
229 | } else if ($sBegin === '"') { | |
230 | $sQuote = '"'; | |
231 | } | |
232 | if ($sQuote !== null) { | |
233 | $this->consume($sQuote); | |
234 | } | |
235 | $sResult = ""; | |
236 | $sContent = null; | |
237 | if ($sQuote === null) { | |
238 | //Unquoted strings end in whitespace or with braces, brackets, parentheses | |
239 | while (!preg_match('/[\\s{}()<>\\[\\]]/isu', $this->peek())) { | |
240 | $sResult .= $this->parseCharacter(false); | |
241 | } | |
242 | } else { | |
243 | while (!$this->comes($sQuote)) { | |
244 | $sContent = $this->parseCharacter(false); | |
245 | if ($sContent === null) { | |
246 | throw new SourceException("Non-well-formed quoted string {$this->peek(3)}", $this->iLineNo); | |
247 | } | |
248 | $sResult .= $sContent; | |
249 | } | |
250 | $this->consume($sQuote); | |
251 | } | |
252 | return new CSSString($sResult, $this->iLineNo); | |
253 | } | |
254 | ||
255 | private function parseCharacter($bIsForIdentifier) { | |
256 | if ($this->peek() === '\\') { | |
257 | if ($bIsForIdentifier && $this->oParserSettings->bLenientParsing && ($this->comes('\0') || $this->comes('\9'))) { | |
258 | // Non-strings can contain \0 or \9 which is an IE hack supported in lenient parsing. | |
259 | return null; | |
260 | } | |
261 | $this->consume('\\'); | |
262 | if ($this->comes('\n') || $this->comes('\r')) { | |
263 | return ''; | |
264 | } | |
265 | if (preg_match('/[0-9a-fA-F]/Su', $this->peek()) === 0) { | |
266 | return $this->consume(1); | |
267 | } | |
268 | $sUnicode = $this->consumeExpression('/^[0-9a-fA-F]{1,6}/u'); | |
269 | if ($this->strlen($sUnicode) < 6) { | |
270 | //Consume whitespace after incomplete unicode escape | |
271 | if (preg_match('/\\s/isSu', $this->peek())) { | |
272 | if ($this->comes('\r\n')) { | |
273 | $this->consume(2); | |
274 | } else { | |
275 | $this->consume(1); | |
276 | } | |
277 | } | |
278 | } | |
279 | $iUnicode = intval($sUnicode, 16); | |
280 | $sUtf32 = ""; | |
281 | for ($i = 0; $i < 4; ++$i) { | |
282 | $sUtf32 .= chr($iUnicode & 0xff); | |
283 | $iUnicode = $iUnicode >> 8; | |
284 | } | |
285 | return iconv('utf-32le', $this->sCharset, $sUtf32); | |
286 | } | |
287 | if ($bIsForIdentifier) { | |
288 | $peek = ord($this->peek()); | |
289 | // Ranges: a-z A-Z 0-9 - _ | |
290 | if (($peek >= 97 && $peek <= 122) || | |
291 | ($peek >= 65 && $peek <= 90) || | |
292 | ($peek >= 48 && $peek <= 57) || | |
293 | ($peek === 45) || | |
294 | ($peek === 95) || | |
295 | ($peek > 0xa1)) { | |
296 | return $this->consume(1); | |
297 | } | |
298 | } else { | |
299 | return $this->consume(1); | |
300 | } | |
301 | return null; | |
302 | } | |
303 | ||
304 | private function parseSelector() { | |
305 | $aComments = array(); | |
306 | $oResult = new DeclarationBlock($this->iLineNo); | |
307 | $oResult->setSelector($this->consumeUntil('{', false, true, $aComments)); | |
308 | $oResult->setComments($aComments); | |
309 | $this->parseRuleSet($oResult); | |
310 | return $oResult; | |
311 | } | |
312 | ||
313 | private function parseRuleSet($oRuleSet) { | |
314 | while ($this->comes(';')) { | |
315 | $this->consume(';'); | |
316 | } | |
317 | while (!$this->comes('}')) { | |
318 | $oRule = null; | |
319 | if($this->oParserSettings->bLenientParsing) { | |
320 | try { | |
321 | $oRule = $this->parseRule(); | |
322 | } catch (UnexpectedTokenException $e) { | |
323 | try { | |
324 | $sConsume = $this->consumeUntil(array("\n", ";", '}'), true); | |
325 | // We need to “unfind” the matches to the end of the ruleSet as this will be matched later | |
326 | if($this->streql(substr($sConsume, -1), '}')) { | |
327 | --$this->iCurrentPosition; | |
328 | } else { | |
329 | while ($this->comes(';')) { | |
330 | $this->consume(';'); | |
331 | } | |
332 | } | |
333 | } catch (UnexpectedTokenException $e) { | |
334 | // We’ve reached the end of the document. Just close the RuleSet. | |
335 | return; | |
336 | } | |
337 | } | |
338 | } else { | |
339 | $oRule = $this->parseRule(); | |
340 | } | |
341 | if($oRule) { | |
342 | $oRuleSet->addRule($oRule); | |
343 | } | |
344 | } | |
345 | $this->consume('}'); | |
346 | } | |
347 | ||
348 | private function parseRule() { | |
349 | $aComments = $this->consumeWhiteSpace(); | |
350 | $oRule = new Rule($this->parseIdentifier(), $this->iLineNo); | |
351 | $oRule->setComments($aComments); | |
352 | $oRule->addComments($this->consumeWhiteSpace()); | |
353 | $this->consume(':'); | |
354 | $oValue = $this->parseValue(self::listDelimiterForRule($oRule->getRule())); | |
355 | $oRule->setValue($oValue); | |
356 | if ($this->oParserSettings->bLenientParsing) { | |
357 | while ($this->comes('\\')) { | |
358 | $this->consume('\\'); | |
359 | $oRule->addIeHack($this->consume()); | |
360 | $this->consumeWhiteSpace(); | |
361 | } | |
362 | } | |
363 | if ($this->comes('!')) { | |
364 | $this->consume('!'); | |
365 | $this->consumeWhiteSpace(); | |
366 | $this->consume('important'); | |
367 | $oRule->setIsImportant(true); | |
368 | } | |
369 | while ($this->comes(';')) { | |
370 | $this->consume(';'); | |
371 | } | |
372 | return $oRule; | |
373 | } | |
374 | ||
375 | private function parseValue($aListDelimiters) { | |
376 | $aStack = array(); | |
377 | $this->consumeWhiteSpace(); | |
378 | //Build a list of delimiters and parsed values | |
379 | while (!($this->comes('}') || $this->comes(';') || $this->comes('!') || $this->comes(')') || $this->comes('\\'))) { | |
380 | if (count($aStack) > 0) { | |
381 | $bFoundDelimiter = false; | |
382 | foreach ($aListDelimiters as $sDelimiter) { | |
383 | if ($this->comes($sDelimiter)) { | |
384 | array_push($aStack, $this->consume($sDelimiter)); | |
385 | $this->consumeWhiteSpace(); | |
386 | $bFoundDelimiter = true; | |
387 | break; | |
388 | } | |
389 | } | |
390 | if (!$bFoundDelimiter) { | |
391 | //Whitespace was the list delimiter | |
392 | array_push($aStack, ' '); | |
393 | } | |
394 | } | |
395 | array_push($aStack, $this->parsePrimitiveValue()); | |
396 | $this->consumeWhiteSpace(); | |
397 | } | |
398 | //Convert the list to list objects | |
399 | foreach ($aListDelimiters as $sDelimiter) { | |
400 | if (count($aStack) === 1) { | |
401 | return $aStack[0]; | |
402 | } | |
403 | $iStartPosition = null; | |
404 | while (($iStartPosition = array_search($sDelimiter, $aStack, true)) !== false) { | |
405 | $iLength = 2; //Number of elements to be joined | |
406 | for ($i = $iStartPosition + 2; $i < count($aStack); $i+=2, ++$iLength) { | |
407 | if ($sDelimiter !== $aStack[$i]) { | |
408 | break; | |
409 | } | |
410 | } | |
411 | $oList = new RuleValueList($sDelimiter, $this->iLineNo); | |
412 | for ($i = $iStartPosition - 1; $i - $iStartPosition + 1 < $iLength * 2; $i+=2) { | |
413 | $oList->addListComponent($aStack[$i]); | |
414 | } | |
415 | array_splice($aStack, $iStartPosition - 1, $iLength * 2 - 1, array($oList)); | |
416 | } | |
417 | } | |
418 | return $aStack[0]; | |
419 | } | |
420 | ||
421 | private static function listDelimiterForRule($sRule) { | |
422 | if (preg_match('/^font($|-)/', $sRule)) { | |
423 | return array(',', '/', ' '); | |
424 | } | |
425 | return array(',', ' ', '/'); | |
426 | } | |
427 | ||
428 | private function parsePrimitiveValue() { | |
429 | $oValue = null; | |
430 | $this->consumeWhiteSpace(); | |
431 | if (is_numeric($this->peek()) || ($this->comes('-.') && is_numeric($this->peek(1, 2))) || (($this->comes('-') || $this->comes('.')) && is_numeric($this->peek(1, 1)))) { | |
432 | $oValue = $this->parseNumericValue(); | |
433 | } else if ($this->comes('#') || $this->comes('rgb', true) || $this->comes('hsl', true)) { | |
434 | $oValue = $this->parseColorValue(); | |
435 | } else if ($this->comes('url', true)) { | |
436 | $oValue = $this->parseURLValue(); | |
437 | } else if ($this->comes("'") || $this->comes('"')) { | |
438 | $oValue = $this->parseStringValue(); | |
439 | } else if ($this->comes("progid:") && $this->oParserSettings->bLenientParsing) { | |
440 | $oValue = $this->parseMicrosoftFilter(); | |
441 | } else { | |
442 | $oValue = $this->parseIdentifier(true, false); | |
443 | } | |
444 | $this->consumeWhiteSpace(); | |
445 | return $oValue; | |
446 | } | |
447 | ||
448 | private function parseNumericValue($bForColor = false) { | |
449 | $sSize = ''; | |
450 | if ($this->comes('-')) { | |
451 | $sSize .= $this->consume('-'); | |
452 | } | |
453 | while (is_numeric($this->peek()) || $this->comes('.')) { | |
454 | if ($this->comes('.')) { | |
455 | $sSize .= $this->consume('.'); | |
456 | } else { | |
457 | $sSize .= $this->consume(1); | |
458 | } | |
459 | } | |
460 | ||
461 | $sUnit = null; | |
462 | foreach ($this->aSizeUnits as $iLength => &$aValues) { | |
463 | $sKey = strtolower($this->peek($iLength)); | |
464 | if(array_key_exists($sKey, $aValues)) { | |
465 | if (($sUnit = $aValues[$sKey]) !== null) { | |
466 | $this->consume($iLength); | |
467 | break; | |
468 | } | |
469 | } | |
470 | } | |
471 | return new Size(floatval($sSize), $sUnit, $bForColor, $this->iLineNo); | |
472 | } | |
473 | ||
474 | private function parseColorValue() { | |
475 | $aColor = array(); | |
476 | if ($this->comes('#')) { | |
477 | $this->consume('#'); | |
478 | $sValue = $this->parseIdentifier(false); | |
479 | if ($this->strlen($sValue) === 3) { | |
480 | $sValue = $sValue[0] . $sValue[0] . $sValue[1] . $sValue[1] . $sValue[2] . $sValue[2]; | |
481 | } | |
482 | $aColor = array('r' => new Size(intval($sValue[0] . $sValue[1], 16), null, true, $this->iLineNo), 'g' => new Size(intval($sValue[2] . $sValue[3], 16), null, true, $this->iLineNo), 'b' => new Size(intval($sValue[4] . $sValue[5], 16), null, true, $this->iLineNo)); | |
483 | } else { | |
484 | $sColorMode = $this->parseIdentifier(false); | |
485 | $this->consumeWhiteSpace(); | |
486 | $this->consume('('); | |
487 | $iLength = $this->strlen($sColorMode); | |
488 | for ($i = 0; $i < $iLength; ++$i) { | |
489 | $this->consumeWhiteSpace(); | |
490 | $aColor[$sColorMode[$i]] = $this->parseNumericValue(true); | |
491 | $this->consumeWhiteSpace(); | |
492 | if ($i < ($iLength - 1)) { | |
493 | $this->consume(','); | |
494 | } | |
495 | } | |
496 | $this->consume(')'); | |
497 | } | |
498 | return new Color($aColor, $this->iLineNo); | |
499 | } | |
500 | ||
501 | private function parseMicrosoftFilter() { | |
502 | $sFunction = $this->consumeUntil('(', false, true); | |
503 | $aArguments = $this->parseValue(array(',', '=')); | |
504 | return new CSSFunction($sFunction, $aArguments, ',', $this->iLineNo); | |
505 | } | |
506 | ||
507 | private function parseURLValue() { | |
508 | $bUseUrl = $this->comes('url', true); | |
509 | if ($bUseUrl) { | |
510 | $this->consume('url'); | |
511 | $this->consumeWhiteSpace(); | |
512 | $this->consume('('); | |
513 | } | |
514 | $this->consumeWhiteSpace(); | |
515 | $oResult = new URL($this->parseStringValue(), $this->iLineNo); | |
516 | if ($bUseUrl) { | |
517 | $this->consumeWhiteSpace(); | |
518 | $this->consume(')'); | |
519 | } | |
520 | return $oResult; | |
521 | } | |
522 | ||
523 | /** | |
524 | * Tests an identifier for a given value. Since identifiers are all keywords, they can be vendor-prefixed. We need to check for these versions too. | |
525 | */ | |
526 | private function identifierIs($sIdentifier, $sMatch) { | |
527 | return (strcasecmp($sIdentifier, $sMatch) === 0) | |
528 | ?: preg_match("/^(-\\w+-)?$sMatch$/i", $sIdentifier) === 1; | |
529 | } | |
530 | ||
531 | private function comes($sString, $bCaseInsensitive = false) { | |
532 | $sPeek = $this->peek(strlen($sString)); | |
533 | return ($sPeek == '') | |
534 | ? false | |
535 | : $this->streql($sPeek, $sString, $bCaseInsensitive); | |
536 | } | |
537 | ||
538 | private function peek($iLength = 1, $iOffset = 0) { | |
539 | $iOffset += $this->iCurrentPosition; | |
540 | if ($iOffset >= $this->iLength) { | |
541 | return ''; | |
542 | } | |
543 | return $this->substr($iOffset, $iLength); | |
544 | } | |
545 | ||
546 | private function consume($mValue = 1) { | |
547 | if (is_string($mValue)) { | |
548 | $iLineCount = substr_count($mValue, "\n"); | |
549 | $iLength = $this->strlen($mValue); | |
550 | if (!$this->streql($this->substr($this->iCurrentPosition, $iLength), $mValue)) { | |
551 | throw new UnexpectedTokenException($mValue, $this->peek(max($iLength, 5)), $this->iLineNo); | |
552 | } | |
553 | $this->iLineNo += $iLineCount; | |
554 | $this->iCurrentPosition += $this->strlen($mValue); | |
555 | return $mValue; | |
556 | } else { | |
557 | if ($this->iCurrentPosition + $mValue > $this->iLength) { | |
558 | throw new UnexpectedTokenException($mValue, $this->peek(5), 'count', $this->iLineNo); | |
559 | } | |
560 | $sResult = $this->substr($this->iCurrentPosition, $mValue); | |
561 | $iLineCount = substr_count($sResult, "\n"); | |
562 | $this->iLineNo += $iLineCount; | |
563 | $this->iCurrentPosition += $mValue; | |
564 | return $sResult; | |
565 | } | |
566 | } | |
567 | ||
568 | private function consumeExpression($mExpression) { | |
569 | $aMatches = null; | |
570 | if (preg_match($mExpression, $this->inputLeft(), $aMatches, PREG_OFFSET_CAPTURE) === 1) { | |
571 | return $this->consume($aMatches[0][0]); | |
572 | } | |
573 | throw new UnexpectedTokenException($mExpression, $this->peek(5), 'expression', $this->iLineNo); | |
574 | } | |
575 | ||
576 | private function consumeWhiteSpace() { | |
577 | $comments = array(); | |
578 | do { | |
579 | while (preg_match('/\\s/isSu', $this->peek()) === 1) { | |
580 | $this->consume(1); | |
581 | } | |
582 | if($this->oParserSettings->bLenientParsing) { | |
583 | try { | |
584 | $oComment = $this->consumeComment(); | |
585 | } catch(UnexpectedTokenException $e) { | |
586 | // When we can’t find the end of a comment, we assume the document is finished. | |
587 | $this->iCurrentPosition = $this->iLength; | |
588 | return; | |
589 | } | |
590 | } else { | |
591 | $oComment = $this->consumeComment(); | |
592 | } | |
593 | if ($oComment !== false) { | |
594 | $comments[] = $oComment; | |
595 | } | |
596 | } while($oComment !== false); | |
597 | return $comments; | |
598 | } | |
599 | ||
600 | /** | |
601 | * @return false|Comment | |
602 | */ | |
603 | private function consumeComment() { | |
604 | $mComment = false; | |
605 | if ($this->comes('/*')) { | |
606 | $iLineNo = $this->iLineNo; | |
607 | $this->consume(1); | |
608 | $mComment = ''; | |
609 | while (($char = $this->consume(1)) !== '') { | |
610 | $mComment .= $char; | |
611 | if ($this->comes('*/')) { | |
612 | $this->consume(2); | |
613 | break; | |
614 | } | |
615 | } | |
616 | } | |
617 | ||
618 | if ($mComment !== false) { | |
619 | // We skip the * which was included in the comment. | |
620 | return new Comment(substr($mComment, 1), $iLineNo); | |
621 | } | |
622 | ||
623 | return $mComment; | |
624 | } | |
625 | ||
626 | private function isEnd() { | |
627 | return $this->iCurrentPosition >= $this->iLength; | |
628 | } | |
629 | ||
630 | private function consumeUntil($aEnd, $bIncludeEnd = false, $consumeEnd = false, array &$comments = array()) { | |
631 | $aEnd = is_array($aEnd) ? $aEnd : array($aEnd); | |
632 | $out = ''; | |
633 | $start = $this->iCurrentPosition; | |
634 | ||
635 | while (($char = $this->consume(1)) !== '') { | |
636 | if (in_array($char, $aEnd)) { | |
637 | if ($bIncludeEnd) { | |
638 | $out .= $char; | |
639 | } elseif (!$consumeEnd) { | |
640 | $this->iCurrentPosition -= $this->strlen($char); | |
641 | } | |
642 | return $out; | |
643 | } | |
644 | $out .= $char; | |
645 | if ($comment = $this->consumeComment()) { | |
646 | $comments[] = $comment; | |
647 | } | |
648 | } | |
649 | ||
650 | $this->iCurrentPosition = $start; | |
651 | throw new UnexpectedTokenException('One of ("'.implode('","', $aEnd).'")', $this->peek(5), 'search', $this->iLineNo); | |
652 | } | |
653 | ||
654 | private function inputLeft() { | |
655 | return $this->substr($this->iCurrentPosition, -1); | |
656 | } | |
657 | ||
658 | private function substr($iStart, $iLength) { | |
659 | if ($iLength < 0) { | |
660 | $iLength = $this->iLength - $iStart + $iLength; | |
661 | } | |
662 | if ($iStart + $iLength > $this->iLength) { | |
663 | $iLength = $this->iLength - $iStart; | |
664 | } | |
665 | $sResult = ''; | |
666 | while ($iLength > 0) { | |
667 | $sResult .= $this->aText[$iStart]; | |
668 | $iStart++; | |
669 | $iLength--; | |
670 | } | |
671 | return $sResult; | |
672 | } | |
673 | ||
674 | private function strlen($sString) { | |
675 | if ($this->oParserSettings->bMultibyteSupport) { | |
676 | return mb_strlen($sString, $this->sCharset); | |
677 | } else { | |
678 | return strlen($sString); | |
679 | } | |
680 | } | |
681 | ||
682 | private function streql($sString1, $sString2, $bCaseInsensitive = true) { | |
683 | if($bCaseInsensitive) { | |
684 | return $this->strtolower($sString1) === $this->strtolower($sString2); | |
685 | } else { | |
686 | return $sString1 === $sString2; | |
687 | } | |
688 | } | |
689 | ||
690 | private function strtolower($sString) { | |
691 | if ($this->oParserSettings->bMultibyteSupport) { | |
692 | return mb_strtolower($sString, $this->sCharset); | |
693 | } else { | |
694 | return strtolower($sString); | |
695 | } | |
696 | } | |
697 | ||
698 | private function strsplit($sString) { | |
699 | if ($this->oParserSettings->bMultibyteSupport) { | |
700 | if ($this->streql($this->sCharset, 'utf-8')) { | |
701 | return preg_split('//u', $sString, null, PREG_SPLIT_NO_EMPTY); | |
702 | } else { | |
703 | $iLength = mb_strlen($sString, $this->sCharset); | |
704 | $aResult = array(); | |
705 | for ($i = 0; $i < $iLength; ++$i) { | |
706 | $aResult[] = mb_substr($sString, $i, 1, $this->sCharset); | |
707 | } | |
708 | return $aResult; | |
709 | } | |
710 | } else { | |
711 | if($sString === '') { | |
712 | return array(); | |
713 | } else { | |
714 | return str_split($sString); | |
715 | } | |
716 | } | |
717 | } | |
718 | ||
719 | private function strpos($sString, $sNeedle, $iOffset) { | |
720 | if ($this->oParserSettings->bMultibyteSupport) { | |
721 | return mb_strpos($sString, $sNeedle, $iOffset, $this->sCharset); | |
722 | } else { | |
723 | return strpos($sString, $sNeedle, $iOffset); | |
724 | } | |
725 | } | |
726 | ||
727 | } |