Skip to content

Commit 3f718ee

Browse files
TomasVotrubanikic
authored andcommitted
[PHP 7.4] Add support for numeric literal separators (nikic#615)
Implements RFC https://wiki.php.net/rfc/numeric_literal_separator. Closes nikic#614.
1 parent b9b45dd commit 3f718ee

File tree

7 files changed

+365
-13
lines changed

7 files changed

+365
-13
lines changed

lib/PhpParser/Lexer/Emulative.php

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
use PhpParser\Lexer;
88
use PhpParser\Lexer\TokenEmulator\CoaleseEqualTokenEmulator;
99
use PhpParser\Lexer\TokenEmulator\FnTokenEmulator;
10+
use PhpParser\Lexer\TokenEmulator\NumericLiteralSeparatorEmulator;
1011
use PhpParser\Lexer\TokenEmulator\TokenEmulatorInterface;
1112
use PhpParser\Parser\Tokens;
1213

@@ -39,6 +40,7 @@ public function __construct(array $options = [])
3940

4041
$this->tokenEmulators[] = new FnTokenEmulator();
4142
$this->tokenEmulators[] = new CoaleseEqualTokenEmulator();
43+
$this->tokenEmulators[] = new NumericLiteralSeparatorEmulator();
4244

4345
$this->tokenMap[self::T_COALESCE_EQUAL] = Tokens::T_COALESCE_EQUAL;
4446
$this->tokenMap[self::T_FN] = Tokens::T_FN;
@@ -58,14 +60,6 @@ public function startLexing(string $code, ErrorHandler $errorHandler = null) {
5860
// 1. emulation of heredoc and nowdoc new syntax
5961
$preparedCode = $this->processHeredocNowdoc($code);
6062
parent::startLexing($preparedCode, $collector);
61-
62-
// add token emulation
63-
foreach ($this->tokenEmulators as $emulativeToken) {
64-
if ($emulativeToken->isEmulationNeeded($code)) {
65-
$this->tokens = $emulativeToken->emulate($code, $this->tokens);
66-
}
67-
}
68-
6963
$this->fixupTokens();
7064

7165
$errors = $collector->getErrors();
@@ -75,6 +69,13 @@ public function startLexing(string $code, ErrorHandler $errorHandler = null) {
7569
$errorHandler->handleError($error);
7670
}
7771
}
72+
73+
// add token emulation
74+
foreach ($this->tokenEmulators as $emulativeToken) {
75+
if ($emulativeToken->isEmulationNeeded($code)) {
76+
$this->tokens = $emulativeToken->emulate($code, $this->tokens);
77+
}
78+
}
7879
}
7980

8081
private function isHeredocNowdocEmulationNeeded(string $code): bool
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
<?php declare(strict_types=1);
2+
3+
namespace PhpParser\Lexer\TokenEmulator;
4+
5+
use PhpParser\Lexer\Emulative;
6+
7+
final class NumericLiteralSeparatorEmulator implements TokenEmulatorInterface
8+
{
9+
const BIN = '(?:0b[01]+(?:_[01]+)*)';
10+
const HEX = '(?:0x[0-9a-f]+(?:_[0-9a-f]+)*)';
11+
const DEC = '(?:[0-9]+(?:_[0-9]+)*)';
12+
const SIMPLE_FLOAT = '(?:' . self::DEC . '\.' . self::DEC . '?|\.' . self::DEC . ')';
13+
const EXP = '(?:e[+-]?' . self::DEC . ')';
14+
const FLOAT = '(?:' . self::SIMPLE_FLOAT . self::EXP . '?|' . self::DEC . self::EXP . ')';
15+
const NUMBER = '~' . self::FLOAT . '|' . self::BIN . '|' . self::HEX . '|' . self::DEC . '~iA';
16+
17+
public function isEmulationNeeded(string $code) : bool
18+
{
19+
// skip version where this is supported
20+
if (version_compare(\PHP_VERSION, Emulative::PHP_7_4, '>=')) {
21+
return false;
22+
}
23+
24+
return preg_match('~[0-9a-f]_[0-9a-f]~i', $code) !== false;
25+
}
26+
27+
public function emulate(string $code, array $tokens): array
28+
{
29+
// We need to manually iterate and manage a count because we'll change
30+
// the tokens array on the way
31+
$codeOffset = 0;
32+
for ($i = 0, $c = count($tokens); $i < $c; ++$i) {
33+
$token = $tokens[$i];
34+
$tokenLen = \strlen(\is_array($token) ? $token[1] : $token);
35+
36+
if ($token[0] !== T_LNUMBER && $token[0] !== T_DNUMBER) {
37+
$codeOffset += $tokenLen;
38+
continue;
39+
}
40+
41+
$res = preg_match(self::NUMBER, $code, $matches, 0, $codeOffset);
42+
assert($res, "No number at number token position");
43+
44+
$match = $matches[0];
45+
$matchLen = \strlen($match);
46+
if ($matchLen === $tokenLen) {
47+
// Original token already holds the full number.
48+
$codeOffset += $tokenLen;
49+
continue;
50+
}
51+
52+
$tokenKind = $this->resolveIntegerOrFloatToken($match);
53+
$newTokens = [[$tokenKind, $match, $token[2]]];
54+
55+
$numTokens = 1;
56+
$len = $tokenLen;
57+
while ($matchLen > $len) {
58+
$nextToken = $tokens[$i + $numTokens];
59+
$nextTokenText = \is_array($nextToken) ? $nextToken[1] : $nextToken;
60+
$nextTokenLen = \strlen($nextTokenText);
61+
62+
$numTokens++;
63+
if ($matchLen < $len + $nextTokenLen) {
64+
// Split trailing characters into a partial token.
65+
assert(is_array($nextToken), "Partial token should be an array token");
66+
$partialText = substr($nextTokenText, $matchLen - $len);
67+
$newTokens[] = [$nextToken[0], $partialText, $nextToken[2]];
68+
break;
69+
}
70+
71+
$len += $nextTokenLen;
72+
}
73+
74+
array_splice($tokens, $i, $numTokens, $newTokens);
75+
$c -= $numTokens - \count($newTokens);
76+
$codeOffset += $matchLen;
77+
}
78+
79+
return $tokens;
80+
}
81+
82+
private function resolveIntegerOrFloatToken(string $str): int
83+
{
84+
$str = str_replace('_', '', $str);
85+
86+
if (stripos($str, '0b') === 0) {
87+
$num = bindec($str);
88+
} elseif (stripos($str, '0x') === 0) {
89+
$num = hexdec($str);
90+
} elseif (stripos($str, '0') === 0 && ctype_digit($str)) {
91+
$num = octdec($str);
92+
} else {
93+
$num = +$str;
94+
}
95+
96+
return is_float($num) ? T_DNUMBER : T_LNUMBER;
97+
}
98+
}

lib/PhpParser/Node/Scalar/DNumber.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ public function getSubNodeNames() : array {
3434
* @return float The parsed number
3535
*/
3636
public static function parse(string $str) : float {
37+
$str = str_replace('_', '', $str);
38+
3739
// if string contains any of .eE just cast it to float
3840
if (false !== strpbrk($str, '.eE')) {
3941
return (float) $str;

lib/PhpParser/Node/Scalar/LNumber.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ public function getSubNodeNames() : array {
4141
* @return LNumber The constructed LNumber, including kind attribute
4242
*/
4343
public static function fromString(string $str, array $attributes = [], bool $allowInvalidOctal = false) : LNumber {
44+
$str = str_replace('_', '', $str);
45+
4446
if ('0' !== $str[0] || '0' === $str) {
4547
$attributes['kind'] = LNumber::KIND_DEC;
4648
return new LNumber((int) $str, $attributes);

test/PhpParser/Lexer/EmulativeTest.php

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -123,10 +123,6 @@ public function testErrorAfterEmulation($code) {
123123

124124
public function provideTestLexNewFeatures() {
125125
return [
126-
// PHP 7.4
127-
['??=', [
128-
[Tokens::T_COALESCE_EQUAL, '??='],
129-
]],
130126
['yield from', [
131127
[Tokens::T_YIELD_FROM, 'yield from'],
132128
]],
@@ -169,7 +165,7 @@ public function provideTestLexNewFeatures() {
169165
[ord(';'), ';'],
170166
]],
171167

172-
// Flexible heredoc/nowdoc
168+
// PHP 7.3: Flexible heredoc/nowdoc
173169
["<<<LABEL\nLABEL,", [
174170
[Tokens::T_START_HEREDOC, "<<<LABEL\n"],
175171
[Tokens::T_END_HEREDOC, "LABEL"],
@@ -205,6 +201,58 @@ public function provideTestLexNewFeatures() {
205201
[Tokens::T_END_HEREDOC, " LABEL"],
206202
[Tokens::T_STRING, "LABEL"],
207203
]],
204+
205+
// PHP 7.4: Null coalesce equal
206+
['??=', [
207+
[Tokens::T_COALESCE_EQUAL, '??='],
208+
]],
209+
210+
// PHP 7.4: Number literal separator
211+
['1_000', [
212+
[Tokens::T_LNUMBER, '1_000'],
213+
]],
214+
['0xCAFE_F00D', [
215+
[Tokens::T_LNUMBER, '0xCAFE_F00D'],
216+
]],
217+
['0b0101_1111', [
218+
[Tokens::T_LNUMBER, '0b0101_1111'],
219+
]],
220+
['0137_041', [
221+
[Tokens::T_LNUMBER, '0137_041'],
222+
]],
223+
['1_000.0', [
224+
[Tokens::T_DNUMBER, '1_000.0'],
225+
]],
226+
['1_0.0', [
227+
[Tokens::T_DNUMBER, '1_0.0']
228+
]],
229+
['1_000_000_000.0', [
230+
[Tokens::T_DNUMBER, '1_000_000_000.0']
231+
]],
232+
['0e1_0', [
233+
[Tokens::T_DNUMBER, '0e1_0']
234+
]],
235+
['1_0e+10', [
236+
[Tokens::T_DNUMBER, '1_0e+10']
237+
]],
238+
['1_0e-10', [
239+
[Tokens::T_DNUMBER, '1_0e-10']
240+
]],
241+
['0b1011010101001010_110101010010_10101101010101_0101101011001_110111100', [
242+
[Tokens::T_DNUMBER, '0b1011010101001010_110101010010_10101101010101_0101101011001_110111100'],
243+
]],
244+
['0xFFFF_FFFF_FFFF_FFFF', [
245+
[Tokens::T_DNUMBER, '0xFFFF_FFFF_FFFF_FFFF'],
246+
]],
247+
['1_000+1', [
248+
[Tokens::T_LNUMBER, '1_000'],
249+
[ord('+'), '+'],
250+
[Tokens::T_LNUMBER, '1'],
251+
]],
252+
['1_0abc', [
253+
[Tokens::T_LNUMBER, '1_0'],
254+
[Tokens::T_STRING, 'abc'],
255+
]],
208256
];
209257
}
210258
}

0 commit comments

Comments
 (0)