|
4 | 4 |
|
5 | 5 | use PhpParser\Parser\Tokens;
|
6 | 6 |
|
7 |
| -/** |
8 |
| - * ATTENTION: This code is WRITE-ONLY. Do not try to read it. |
9 |
| - */ |
10 | 7 | class Emulative extends \PhpParser\Lexer
|
11 | 8 | {
|
12 | 9 | protected $newKeywords;
|
@@ -56,128 +53,103 @@ public function __construct(array $options = array()) {
|
56 | 53 | public function startLexing($code) {
|
57 | 54 | $this->inObjectAccess = false;
|
58 | 55 |
|
59 |
| - $preprocessedCode = $this->preprocessCode($code); |
60 |
| - parent::startLexing($preprocessedCode); |
61 |
| - if ($preprocessedCode !== $code) { |
62 |
| - $this->postprocessTokens(); |
| 56 | + parent::startLexing($code); |
| 57 | + if ($this->requiresEmulation($code)) { |
| 58 | + $this->emulateTokens(); |
63 | 59 | }
|
64 |
| - |
65 |
| - // Set code property back to the original code, so __halt_compiler() |
66 |
| - // handling and (start|end)FilePos attributes use the correct offsets |
67 |
| - $this->code = $code; |
68 | 60 | }
|
69 | 61 |
|
70 | 62 | /*
|
71 |
| - * Replaces new features in the code by ~__EMU__{NAME}__{DATA}__~ sequences. |
72 |
| - * ~LABEL~ is never valid PHP code, that's why we can (to some degree) safely |
73 |
| - * use it here. |
74 |
| - * Later when preprocessing the tokens these sequences will either be replaced |
75 |
| - * by real tokens or replaced with their original content (e.g. if they occurred |
76 |
| - * inside a string, i.e. a place where they don't have a special meaning). |
| 63 | + * Checks if the code is potentially using features that require emulation. |
77 | 64 | */
|
78 |
| - protected function preprocessCode($code) { |
| 65 | + protected function requiresEmulation($code) { |
79 | 66 | if (version_compare(PHP_VERSION, self::PHP_7_0, '>=')) {
|
80 |
| - return $code; |
| 67 | + return false; |
81 | 68 | }
|
82 | 69 |
|
83 |
| - $code = str_replace('??', '~__EMU__COALESCE__~', $code); |
84 |
| - $code = str_replace('<=>', '~__EMU__SPACESHIP__~', $code); |
85 |
| - $code = preg_replace_callback('(yield[ \n\r\t]+from)', function($matches) { |
86 |
| - // Encoding $0 in order to preserve exact whitespace |
87 |
| - return '~__EMU__YIELDFROM__' . bin2hex($matches[0]) . '__~'; |
88 |
| - }, $code); |
| 70 | + if (preg_match('/\?\?|<=>|yield[ \n\r\t]+from/', $code)) { |
| 71 | + return true; |
| 72 | + } |
89 | 73 |
|
90 | 74 | if (version_compare(PHP_VERSION, self::PHP_5_6, '>=')) {
|
91 |
| - return $code; |
| 75 | + return false; |
92 | 76 | }
|
93 | 77 |
|
94 |
| - $code = str_replace('...', '~__EMU__ELLIPSIS__~', $code); |
95 |
| - $code = preg_replace('((?<!/)\*\*=)', '~__EMU__POWEQUAL__~', $code); |
96 |
| - $code = preg_replace('((?<!/)\*\*(?!/))', '~__EMU__POW__~', $code); |
97 |
| - |
98 |
| - return $code; |
| 78 | + return preg_match('/\.\.\.|(?<!/)\*\*(?!/)/', $code); |
99 | 79 | }
|
100 | 80 |
|
101 | 81 | /*
|
102 |
| - * Replaces the ~__EMU__...~ sequences with real tokens or their original |
103 |
| - * value. |
| 82 | + * Emulates tokens for newer PHP versions. |
104 | 83 | */
|
105 |
| - protected function postprocessTokens() { |
106 |
| - // we need to manually iterate and manage a count because we'll change |
| 84 | + protected function emulateTokens() { |
| 85 | + // We need to manually iterate and manage a count because we'll change |
107 | 86 | // the tokens array on the way
|
| 87 | + $line = 1; |
108 | 88 | for ($i = 0, $c = count($this->tokens); $i < $c; ++$i) {
|
109 |
| - // first check that the following tokens are of form ~LABEL~, |
110 |
| - // then match the __EMU__... sequence. |
111 |
| - if ('~' === $this->tokens[$i] |
112 |
| - && isset($this->tokens[$i + 2]) |
113 |
| - && '~' === $this->tokens[$i + 2] |
114 |
| - && T_STRING === $this->tokens[$i + 1][0] |
115 |
| - && preg_match('(^__EMU__([A-Z]++)__(?:([A-Za-z0-9]++)__)?$)', $this->tokens[$i + 1][1], $matches) |
116 |
| - ) { |
117 |
| - if ('ELLIPSIS' === $matches[1]) { |
118 |
| - $replace = array( |
119 |
| - array(self::T_ELLIPSIS, '...', $this->tokens[$i + 1][2]) |
120 |
| - ); |
121 |
| - } else if ('POW' === $matches[1]) { |
122 |
| - $replace = array( |
123 |
| - array(self::T_POW, '**', $this->tokens[$i + 1][2]) |
124 |
| - ); |
125 |
| - } else if ('POWEQUAL' === $matches[1]) { |
126 |
| - $replace = array( |
127 |
| - array(self::T_POW_EQUAL, '**=', $this->tokens[$i + 1][2]) |
128 |
| - ); |
129 |
| - } else if ('COALESCE' === $matches[1]) { |
130 |
| - $replace = array( |
131 |
| - array(self::T_COALESCE, '??', $this->tokens[$i + 1][2]) |
132 |
| - ); |
133 |
| - } else if ('SPACESHIP' === $matches[1]) { |
134 |
| - $replace = array( |
135 |
| - array(self::T_SPACESHIP, '<=>', $this->tokens[$i + 1][2]), |
136 |
| - ); |
137 |
| - } else if ('YIELDFROM' === $matches[1]) { |
138 |
| - $content = hex2bin($matches[2]); |
139 |
| - $replace = array( |
140 |
| - array(self::T_YIELD_FROM, $content, $this->tokens[$i + 1][2] - substr_count($content, "\n")) |
141 |
| - ); |
142 |
| - } else { |
143 |
| - throw new \RuntimeException('Invalid __EMU__ sequence'); |
| 89 | + $replace = null; |
| 90 | + if (isset($this->tokens[$i + 1])) { |
| 91 | + if ($this->tokens[$i] === '?' && $this->tokens[$i + 1] === '?') { |
| 92 | + array_splice($this->tokens, $i, 2, array( |
| 93 | + array(self::T_COALESCE, '??', $line) |
| 94 | + )); |
| 95 | + $c--; |
| 96 | + continue; |
| 97 | + } |
| 98 | + if ($this->tokens[$i][0] === T_IS_SMALLER_OR_EQUAL |
| 99 | + && $this->tokens[$i + 1] === '>' |
| 100 | + ) { |
| 101 | + array_splice($this->tokens, $i, 2, array( |
| 102 | + array(self::T_SPACESHIP, '<=>', $line) |
| 103 | + )); |
| 104 | + $c--; |
| 105 | + continue; |
144 | 106 | }
|
| 107 | + if ($this->tokens[$i] === '*' && $this->tokens[$i + 1] === '*') { |
| 108 | + array_splice($this->tokens, $i, 2, array( |
| 109 | + array(self::T_POW, '**', $line) |
| 110 | + )); |
| 111 | + $c--; |
| 112 | + continue; |
| 113 | + } |
| 114 | + if ($this->tokens[$i] === '*' && $this->tokens[$i + 1][0] === T_MUL_EQUAL) { |
| 115 | + array_splice($this->tokens, $i, 2, array( |
| 116 | + array(self::T_POW_EQUAL, '**=', $line) |
| 117 | + )); |
| 118 | + $c--; |
| 119 | + continue; |
| 120 | + } |
| 121 | + } |
145 | 122 |
|
146 |
| - array_splice($this->tokens, $i, 3, $replace); |
147 |
| - $c -= 3 - count($replace); |
148 |
| - // for multichar tokens (e.g. strings) replace any ~__EMU__...~ sequences |
149 |
| - // in their content with the original character sequence |
150 |
| - } elseif (is_array($this->tokens[$i]) |
151 |
| - && 0 !== strpos($this->tokens[$i][1], '__EMU__') |
152 |
| - ) { |
153 |
| - $this->tokens[$i][1] = preg_replace_callback( |
154 |
| - '(~__EMU__([A-Z]++)__(?:([A-Za-z0-9]++)__)?~)', |
155 |
| - array($this, 'restoreContentCallback'), |
156 |
| - $this->tokens[$i][1] |
157 |
| - ); |
| 123 | + if (isset($this->tokens[$i + 2])) { |
| 124 | + if ($this->tokens[$i][0] === T_YIELD && $this->tokens[$i + 1][0] === T_WHITESPACE |
| 125 | + && $this->tokens[$i + 2][0] === T_STRING |
| 126 | + && !strcasecmp($this->tokens[$i + 2][1], 'from') |
| 127 | + ) { |
| 128 | + array_splice($this->tokens, $i, 3, array( |
| 129 | + array( |
| 130 | + self::T_YIELD_FROM, |
| 131 | + $this->tokens[$i][1] . $this->tokens[$i + 1][1] . $this->tokens[$i + 2][1], |
| 132 | + $line |
| 133 | + ) |
| 134 | + )); |
| 135 | + $c -= 2; |
| 136 | + $line += substr_count($this->tokens[$i][1], "\n"); |
| 137 | + continue; |
| 138 | + } |
| 139 | + if ($this->tokens[$i] === '.' && $this->tokens[$i + 1] === '.' |
| 140 | + && $this->tokens[$i + 2] === '.' |
| 141 | + ) { |
| 142 | + array_splice($this->tokens, $i, 3, array( |
| 143 | + array(self::T_ELLIPSIS, '...', $line) |
| 144 | + )); |
| 145 | + $c -= 2; |
| 146 | + continue; |
| 147 | + } |
158 | 148 | }
|
159 |
| - } |
160 |
| - } |
161 | 149 |
|
162 |
| - /* |
163 |
| - * This method is a callback for restoring EMU sequences in |
164 |
| - * multichar tokens (like strings) to their original value. |
165 |
| - */ |
166 |
| - public function restoreContentCallback(array $matches) { |
167 |
| - if ('ELLIPSIS' === $matches[1]) { |
168 |
| - return '...'; |
169 |
| - } else if ('POW' === $matches[1]) { |
170 |
| - return '**'; |
171 |
| - } else if ('POWEQUAL' === $matches[1]) { |
172 |
| - return '**='; |
173 |
| - } else if ('COALESCE' === $matches[1]) { |
174 |
| - return '??'; |
175 |
| - } else if ('SPACESHIP' === $matches[1]) { |
176 |
| - return '<=>'; |
177 |
| - } else if ('YIELDFROM' === $matches[1]) { |
178 |
| - return hex2bin($matches[2]); |
179 |
| - } else { |
180 |
| - return $matches[0]; |
| 150 | + if (\is_array($this->tokens[$i])) { |
| 151 | + $line += substr_count($this->tokens[$i][1], "\n"); |
| 152 | + } |
181 | 153 | }
|
182 | 154 | }
|
183 | 155 |
|
|
0 commit comments