vendor/twig/twig/src/Lexer.php line 449

Open in your IDE?
  1. <?php
  2. /*
  3. * This file is part of Twig.
  4. *
  5. * (c) Fabien Potencier
  6. * (c) Armin Ronacher
  7. *
  8. * For the full copyright and license information, please view the LICENSE
  9. * file that was distributed with this source code.
  10. */
  11. namespace Twig;
  12. use Twig\Error\SyntaxError;
  13. /**
  14. * @author Fabien Potencier <fabien@symfony.com>
  15. */
  16. class Lexer
  17. {
  18. private $isInitialized = false;
  19. private $tokens;
  20. private $code;
  21. private $cursor;
  22. private $lineno;
  23. private $end;
  24. private $state;
  25. private $states;
  26. private $brackets;
  27. private $env;
  28. private $source;
  29. private $options;
  30. private $regexes;
  31. private $position;
  32. private $positions;
  33. private $currentVarBlockLine;
  34. public const STATE_DATA = 0;
  35. public const STATE_BLOCK = 1;
  36. public const STATE_VAR = 2;
  37. public const STATE_STRING = 3;
  38. public const STATE_INTERPOLATION = 4;
  39. public const REGEX_NAME = '/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A';
  40. public const REGEX_STRING = '/"([^#"\\\\]*(?:\\\\.[^#"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As';
  41. public const REGEX_NUMBER = '/(?(DEFINE)
  42. (?<LNUM>[0-9]+(_[0-9]+)*) # Integers (with underscores) 123_456
  43. (?<FRAC>\.(?&LNUM)) # Fractional part .456
  44. (?<EXPONENT>[eE][+-]?(?&LNUM)) # Exponent part E+10
  45. (?<DNUM>(?&LNUM)(?:(?&FRAC))?) # Decimal number 123_456.456
  46. )(?:(?&DNUM)(?:(?&EXPONENT))?) # 123_456.456E+10
  47. /Ax';
  48. public const REGEX_DQ_STRING_DELIM = '/"/A';
  49. public const REGEX_DQ_STRING_PART = '/[^#"\\\\]*(?:(?:\\\\.|#(?!\{))[^#"\\\\]*)*/As';
  50. public const REGEX_INLINE_COMMENT = '/#[^\n]*/A';
  51. public const PUNCTUATION = '()[]{}?:.,|';
  52. private const SPECIAL_CHARS = [
  53. 'f' => "\f",
  54. 'n' => "\n",
  55. 'r' => "\r",
  56. 't' => "\t",
  57. 'v' => "\v",
  58. ];
  59. public function __construct(Environment $env, array $options = [])
  60. {
  61. $this->env = $env;
  62. $this->options = array_merge([
  63. 'tag_comment' => ['{#', '#}'],
  64. 'tag_block' => ['{%', '%}'],
  65. 'tag_variable' => ['{{', '}}'],
  66. 'whitespace_trim' => '-',
  67. 'whitespace_line_trim' => '~',
  68. 'whitespace_line_chars' => ' \t\0\x0B',
  69. 'interpolation' => ['#{', '}'],
  70. ], $options);
  71. }
  72. private function initialize(): void
  73. {
  74. if ($this->isInitialized) {
  75. return;
  76. }
  77. // when PHP 7.3 is the min version, we will be able to remove the '#' part in preg_quote as it's part of the default
  78. $this->regexes = [
  79. // }}
  80. 'lex_var' => '{
  81. \s*
  82. (?:'.
  83. preg_quote($this->options['whitespace_trim'].$this->options['tag_variable'][1], '#').'\s*'. // -}}\s*
  84. '|'.
  85. preg_quote($this->options['whitespace_line_trim'].$this->options['tag_variable'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~}}[ \t\0\x0B]*
  86. '|'.
  87. preg_quote($this->options['tag_variable'][1], '#'). // }}
  88. ')
  89. }Ax',
  90. // %}
  91. 'lex_block' => '{
  92. \s*
  93. (?:'.
  94. preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*\n?'. // -%}\s*\n?
  95. '|'.
  96. preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
  97. '|'.
  98. preg_quote($this->options['tag_block'][1], '#').'\n?'. // %}\n?
  99. ')
  100. }Ax',
  101. // {% endverbatim %}
  102. 'lex_raw_data' => '{'.
  103. preg_quote($this->options['tag_block'][0], '#'). // {%
  104. '('.
  105. $this->options['whitespace_trim']. // -
  106. '|'.
  107. $this->options['whitespace_line_trim']. // ~
  108. ')?\s*endverbatim\s*'.
  109. '(?:'.
  110. preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'. // -%}
  111. '|'.
  112. preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
  113. '|'.
  114. preg_quote($this->options['tag_block'][1], '#'). // %}
  115. ')
  116. }sx',
  117. 'operator' => $this->getOperatorRegex(),
  118. // #}
  119. 'lex_comment' => '{
  120. (?:'.
  121. preg_quote($this->options['whitespace_trim'].$this->options['tag_comment'][1], '#').'\s*\n?'. // -#}\s*\n?
  122. '|'.
  123. preg_quote($this->options['whitespace_line_trim'].$this->options['tag_comment'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~#}[ \t\0\x0B]*
  124. '|'.
  125. preg_quote($this->options['tag_comment'][1], '#').'\n?'. // #}\n?
  126. ')
  127. }sx',
  128. // verbatim %}
  129. 'lex_block_raw' => '{
  130. \s*verbatim\s*
  131. (?:'.
  132. preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'. // -%}\s*
  133. '|'.
  134. preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
  135. '|'.
  136. preg_quote($this->options['tag_block'][1], '#'). // %}
  137. ')
  138. }Asx',
  139. 'lex_block_line' => '{\s*line\s+(\d+)\s*'.preg_quote($this->options['tag_block'][1], '#').'}As',
  140. // {{ or {% or {#
  141. 'lex_tokens_start' => '{
  142. ('.
  143. preg_quote($this->options['tag_variable'][0], '#'). // {{
  144. '|'.
  145. preg_quote($this->options['tag_block'][0], '#'). // {%
  146. '|'.
  147. preg_quote($this->options['tag_comment'][0], '#'). // {#
  148. ')('.
  149. preg_quote($this->options['whitespace_trim'], '#'). // -
  150. '|'.
  151. preg_quote($this->options['whitespace_line_trim'], '#'). // ~
  152. ')?
  153. }sx',
  154. 'interpolation_start' => '{'.preg_quote($this->options['interpolation'][0], '#').'\s*}A',
  155. 'interpolation_end' => '{\s*'.preg_quote($this->options['interpolation'][1], '#').'}A',
  156. ];
  157. $this->isInitialized = true;
  158. }
  159. public function tokenize(Source $source): TokenStream
  160. {
  161. $this->initialize();
  162. $this->source = $source;
  163. $this->code = str_replace(["\r\n", "\r"], "\n", $source->getCode());
  164. $this->cursor = 0;
  165. $this->lineno = 1;
  166. $this->end = \strlen($this->code);
  167. $this->tokens = [];
  168. $this->state = self::STATE_DATA;
  169. $this->states = [];
  170. $this->brackets = [];
  171. $this->position = -1;
  172. // find all token starts in one go
  173. preg_match_all($this->regexes['lex_tokens_start'], $this->code, $matches, \PREG_OFFSET_CAPTURE);
  174. $this->positions = $matches;
  175. while ($this->cursor < $this->end) {
  176. // dispatch to the lexing functions depending
  177. // on the current state
  178. switch ($this->state) {
  179. case self::STATE_DATA:
  180. $this->lexData();
  181. break;
  182. case self::STATE_BLOCK:
  183. $this->lexBlock();
  184. break;
  185. case self::STATE_VAR:
  186. $this->lexVar();
  187. break;
  188. case self::STATE_STRING:
  189. $this->lexString();
  190. break;
  191. case self::STATE_INTERPOLATION:
  192. $this->lexInterpolation();
  193. break;
  194. }
  195. }
  196. $this->pushToken(Token::EOF_TYPE);
  197. if ($this->brackets) {
  198. [$expect, $lineno] = array_pop($this->brackets);
  199. throw new SyntaxError(\sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
  200. }
  201. return new TokenStream($this->tokens, $this->source);
  202. }
  203. private function lexData(): void
  204. {
  205. // if no matches are left we return the rest of the template as simple text token
  206. if ($this->position == \count($this->positions[0]) - 1) {
  207. $this->pushToken(Token::TEXT_TYPE, substr($this->code, $this->cursor));
  208. $this->cursor = $this->end;
  209. return;
  210. }
  211. // Find the first token after the current cursor
  212. $position = $this->positions[0][++$this->position];
  213. while ($position[1] < $this->cursor) {
  214. if ($this->position == \count($this->positions[0]) - 1) {
  215. return;
  216. }
  217. $position = $this->positions[0][++$this->position];
  218. }
  219. // push the template text first
  220. $text = $textContent = substr($this->code, $this->cursor, $position[1] - $this->cursor);
  221. // trim?
  222. if (isset($this->positions[2][$this->position][0])) {
  223. if ($this->options['whitespace_trim'] === $this->positions[2][$this->position][0]) {
  224. // whitespace_trim detected ({%-, {{- or {#-)
  225. $text = rtrim($text);
  226. } elseif ($this->options['whitespace_line_trim'] === $this->positions[2][$this->position][0]) {
  227. // whitespace_line_trim detected ({%~, {{~ or {#~)
  228. // don't trim \r and \n
  229. $text = rtrim($text, " \t\0\x0B");
  230. }
  231. }
  232. $this->pushToken(Token::TEXT_TYPE, $text);
  233. $this->moveCursor($textContent.$position[0]);
  234. switch ($this->positions[1][$this->position][0]) {
  235. case $this->options['tag_comment'][0]:
  236. $this->lexComment();
  237. break;
  238. case $this->options['tag_block'][0]:
  239. // raw data?
  240. if (preg_match($this->regexes['lex_block_raw'], $this->code, $match, 0, $this->cursor)) {
  241. $this->moveCursor($match[0]);
  242. $this->lexRawData();
  243. // {% line \d+ %}
  244. } elseif (preg_match($this->regexes['lex_block_line'], $this->code, $match, 0, $this->cursor)) {
  245. $this->moveCursor($match[0]);
  246. $this->lineno = (int) $match[1];
  247. } else {
  248. $this->pushToken(Token::BLOCK_START_TYPE);
  249. $this->pushState(self::STATE_BLOCK);
  250. $this->currentVarBlockLine = $this->lineno;
  251. }
  252. break;
  253. case $this->options['tag_variable'][0]:
  254. $this->pushToken(Token::VAR_START_TYPE);
  255. $this->pushState(self::STATE_VAR);
  256. $this->currentVarBlockLine = $this->lineno;
  257. break;
  258. }
  259. }
  260. private function lexBlock(): void
  261. {
  262. if (!$this->brackets && preg_match($this->regexes['lex_block'], $this->code, $match, 0, $this->cursor)) {
  263. $this->pushToken(Token::BLOCK_END_TYPE);
  264. $this->moveCursor($match[0]);
  265. $this->popState();
  266. } else {
  267. $this->lexExpression();
  268. }
  269. }
  270. private function lexVar(): void
  271. {
  272. if (!$this->brackets && preg_match($this->regexes['lex_var'], $this->code, $match, 0, $this->cursor)) {
  273. $this->pushToken(Token::VAR_END_TYPE);
  274. $this->moveCursor($match[0]);
  275. $this->popState();
  276. } else {
  277. $this->lexExpression();
  278. }
  279. }
  280. private function lexExpression(): void
  281. {
  282. // whitespace
  283. if (preg_match('/\s+/A', $this->code, $match, 0, $this->cursor)) {
  284. $this->moveCursor($match[0]);
  285. if ($this->cursor >= $this->end) {
  286. throw new SyntaxError(\sprintf('Unclosed "%s".', self::STATE_BLOCK === $this->state ? 'block' : 'variable'), $this->currentVarBlockLine, $this->source);
  287. }
  288. }
  289. // spread operator
  290. if ('.' === $this->code[$this->cursor] && ($this->cursor + 2 < $this->end) && '.' === $this->code[$this->cursor + 1] && '.' === $this->code[$this->cursor + 2]) {
  291. $this->pushToken(Token::SPREAD_TYPE, '...');
  292. $this->moveCursor('...');
  293. }
  294. // arrow function
  295. elseif ('=' === $this->code[$this->cursor] && ($this->cursor + 1 < $this->end) && '>' === $this->code[$this->cursor + 1]) {
  296. $this->pushToken(Token::ARROW_TYPE, '=>');
  297. $this->moveCursor('=>');
  298. }
  299. // operators
  300. elseif (preg_match($this->regexes['operator'], $this->code, $match, 0, $this->cursor)) {
  301. $this->pushToken(Token::OPERATOR_TYPE, preg_replace('/\s+/', ' ', $match[0]));
  302. $this->moveCursor($match[0]);
  303. }
  304. // names
  305. elseif (preg_match(self::REGEX_NAME, $this->code, $match, 0, $this->cursor)) {
  306. $this->pushToken(Token::NAME_TYPE, $match[0]);
  307. $this->moveCursor($match[0]);
  308. }
  309. // numbers
  310. elseif (preg_match(self::REGEX_NUMBER, $this->code, $match, 0, $this->cursor)) {
  311. $this->pushToken(Token::NUMBER_TYPE, 0 + str_replace('_', '', $match[0]));
  312. $this->moveCursor($match[0]);
  313. }
  314. // punctuation
  315. elseif (str_contains(self::PUNCTUATION, $this->code[$this->cursor])) {
  316. // opening bracket
  317. if (str_contains('([{', $this->code[$this->cursor])) {
  318. $this->brackets[] = [$this->code[$this->cursor], $this->lineno];
  319. }
  320. // closing bracket
  321. elseif (str_contains(')]}', $this->code[$this->cursor])) {
  322. if (!$this->brackets) {
  323. throw new SyntaxError(\sprintf('Unexpected "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
  324. }
  325. [$expect, $lineno] = array_pop($this->brackets);
  326. if ($this->code[$this->cursor] != strtr($expect, '([{', ')]}')) {
  327. throw new SyntaxError(\sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
  328. }
  329. }
  330. $this->pushToken(Token::PUNCTUATION_TYPE, $this->code[$this->cursor]);
  331. ++$this->cursor;
  332. }
  333. // strings
  334. elseif (preg_match(self::REGEX_STRING, $this->code, $match, 0, $this->cursor)) {
  335. $this->pushToken(Token::STRING_TYPE, $this->stripcslashes(substr($match[0], 1, -1), substr($match[0], 0, 1)));
  336. $this->moveCursor($match[0]);
  337. }
  338. // opening double quoted string
  339. elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, 0, $this->cursor)) {
  340. $this->brackets[] = ['"', $this->lineno];
  341. $this->pushState(self::STATE_STRING);
  342. $this->moveCursor($match[0]);
  343. }
  344. // inline comment
  345. elseif (preg_match(self::REGEX_INLINE_COMMENT, $this->code, $match, 0, $this->cursor)) {
  346. $this->moveCursor($match[0]);
  347. }
  348. // unlexable
  349. else {
  350. throw new SyntaxError(\sprintf('Unexpected character "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
  351. }
  352. }
  353. private function stripcslashes(string $str, string $quoteType): string
  354. {
  355. $result = '';
  356. $length = \strlen($str);
  357. $i = 0;
  358. while ($i < $length) {
  359. if (false === $pos = strpos($str, '\\', $i)) {
  360. $result .= substr($str, $i);
  361. break;
  362. }
  363. $result .= substr($str, $i, $pos - $i);
  364. $i = $pos + 1;
  365. if ($i >= $length) {
  366. $result .= '\\';
  367. break;
  368. }
  369. $nextChar = $str[$i];
  370. if (isset(self::SPECIAL_CHARS[$nextChar])) {
  371. $result .= self::SPECIAL_CHARS[$nextChar];
  372. } elseif ('\\' === $nextChar) {
  373. $result .= $nextChar;
  374. } elseif ("'" === $nextChar || '"' === $nextChar) {
  375. if ($nextChar !== $quoteType) {
  376. trigger_deprecation('twig/twig', '3.12', 'Character "%s" should not be escaped; the "\" character is ignored in Twig 3 but will not be in Twig 4. Please remove the extra "\" character at position %d in "%s" at line %d.', $nextChar, $i + 1, $this->source->getName(), $this->lineno);
  377. }
  378. $result .= $nextChar;
  379. } elseif ('#' === $nextChar && $i + 1 < $length && '{' === $str[$i + 1]) {
  380. $result .= '#{';
  381. ++$i;
  382. } elseif ('x' === $nextChar && $i + 1 < $length && ctype_xdigit($str[$i + 1])) {
  383. $hex = $str[++$i];
  384. if ($i + 1 < $length && ctype_xdigit($str[$i + 1])) {
  385. $hex .= $str[++$i];
  386. }
  387. $result .= \chr(hexdec($hex));
  388. } elseif (ctype_digit($nextChar) && $nextChar < '8') {
  389. $octal = $nextChar;
  390. while ($i + 1 < $length && ctype_digit($str[$i + 1]) && $str[$i + 1] < '8' && \strlen($octal) < 3) {
  391. $octal .= $str[++$i];
  392. }
  393. $result .= \chr(octdec($octal));
  394. } else {
  395. trigger_deprecation('twig/twig', '3.12', 'Character "%s" should not be escaped; the "\" character is ignored in Twig 3 but will not be in Twig 4. Please remove the extra "\" character at position %d in "%s" at line %d.', $nextChar, $i + 1, $this->source->getName(), $this->lineno);
  396. $result .= $nextChar;
  397. }
  398. ++$i;
  399. }
  400. return $result;
  401. }
  402. private function lexRawData(): void
  403. {
  404. if (!preg_match($this->regexes['lex_raw_data'], $this->code, $match, \PREG_OFFSET_CAPTURE, $this->cursor)) {
  405. throw new SyntaxError('Unexpected end of file: Unclosed "verbatim" block.', $this->lineno, $this->source);
  406. }
  407. $text = substr($this->code, $this->cursor, $match[0][1] - $this->cursor);
  408. $this->moveCursor($text.$match[0][0]);
  409. // trim?
  410. if (isset($match[1][0])) {
  411. if ($this->options['whitespace_trim'] === $match[1][0]) {
  412. // whitespace_trim detected ({%-, {{- or {#-)
  413. $text = rtrim($text);
  414. } else {
  415. // whitespace_line_trim detected ({%~, {{~ or {#~)
  416. // don't trim \r and \n
  417. $text = rtrim($text, " \t\0\x0B");
  418. }
  419. }
  420. $this->pushToken(Token::TEXT_TYPE, $text);
  421. }
  422. private function lexComment(): void
  423. {
  424. if (!preg_match($this->regexes['lex_comment'], $this->code, $match, \PREG_OFFSET_CAPTURE, $this->cursor)) {
  425. throw new SyntaxError('Unclosed comment.', $this->lineno, $this->source);
  426. }
  427. $this->moveCursor(substr($this->code, $this->cursor, $match[0][1] - $this->cursor).$match[0][0]);
  428. }
  429. private function lexString(): void
  430. {
  431. if (preg_match($this->regexes['interpolation_start'], $this->code, $match, 0, $this->cursor)) {
  432. $this->brackets[] = [$this->options['interpolation'][0], $this->lineno];
  433. $this->pushToken(Token::INTERPOLATION_START_TYPE);
  434. $this->moveCursor($match[0]);
  435. $this->pushState(self::STATE_INTERPOLATION);
  436. } elseif (preg_match(self::REGEX_DQ_STRING_PART, $this->code, $match, 0, $this->cursor) && '' !== $match[0]) {
  437. $this->pushToken(Token::STRING_TYPE, $this->stripcslashes($match[0], '"'));
  438. $this->moveCursor($match[0]);
  439. } elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, 0, $this->cursor)) {
  440. [$expect, $lineno] = array_pop($this->brackets);
  441. if ('"' != $this->code[$this->cursor]) {
  442. throw new SyntaxError(\sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
  443. }
  444. $this->popState();
  445. ++$this->cursor;
  446. } else {
  447. // unlexable
  448. throw new SyntaxError(\sprintf('Unexpected character "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
  449. }
  450. }
  451. private function lexInterpolation(): void
  452. {
  453. $bracket = end($this->brackets);
  454. if ($this->options['interpolation'][0] === $bracket[0] && preg_match($this->regexes['interpolation_end'], $this->code, $match, 0, $this->cursor)) {
  455. array_pop($this->brackets);
  456. $this->pushToken(Token::INTERPOLATION_END_TYPE);
  457. $this->moveCursor($match[0]);
  458. $this->popState();
  459. } else {
  460. $this->lexExpression();
  461. }
  462. }
  463. private function pushToken($type, $value = ''): void
  464. {
  465. // do not push empty text tokens
  466. if (Token::TEXT_TYPE === $type && '' === $value) {
  467. return;
  468. }
  469. $this->tokens[] = new Token($type, $value, $this->lineno);
  470. }
  471. private function moveCursor($text): void
  472. {
  473. $this->cursor += \strlen($text);
  474. $this->lineno += substr_count($text, "\n");
  475. }
  476. private function getOperatorRegex(): string
  477. {
  478. $operators = array_merge(
  479. ['='],
  480. array_keys($this->env->getUnaryOperators()),
  481. array_keys($this->env->getBinaryOperators())
  482. );
  483. $operators = array_combine($operators, array_map('strlen', $operators));
  484. arsort($operators);
  485. $regex = [];
  486. foreach ($operators as $operator => $length) {
  487. // an operator that ends with a character must be followed by
  488. // a whitespace, a parenthesis, an opening map [ or sequence {
  489. $r = preg_quote($operator, '/');
  490. if (ctype_alpha($operator[$length - 1])) {
  491. $r .= '(?=[\s()\[{])';
  492. }
  493. // an operator that begins with a character must not have a dot or pipe before
  494. if (ctype_alpha($operator[0])) {
  495. $r = '(?<![\.\|])'.$r;
  496. }
  497. // an operator with a space can be any amount of whitespaces
  498. $r = preg_replace('/\s+/', '\s+', $r);
  499. $regex[] = $r;
  500. }
  501. return '/'.implode('|', $regex).'/A';
  502. }
  503. private function pushState($state): void
  504. {
  505. $this->states[] = $this->state;
  506. $this->state = $state;
  507. }
  508. private function popState(): void
  509. {
  510. if (0 === \count($this->states)) {
  511. throw new \LogicException('Cannot pop state without a previous state.');
  512. }
  513. $this->state = array_pop($this->states);
  514. }
  515. }