Skip to content

Commit 75abbbd

Browse files
committed
Handle flexible heredoc via TokenEmulator
Extend the interface to support preprocessing.
1 parent 39b0460 commit 75abbbd

9 files changed

+128
-111
lines changed

lib/PhpParser/Lexer/Emulative.php

Lines changed: 16 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,13 @@
66
use PhpParser\ErrorHandler;
77
use PhpParser\Lexer;
88
use PhpParser\Lexer\TokenEmulator\CoaleseEqualTokenEmulator;
9+
use PhpParser\Lexer\TokenEmulator\FlexibleDocStringEmulator;
910
use PhpParser\Lexer\TokenEmulator\FnTokenEmulator;
1011
use PhpParser\Lexer\TokenEmulator\MatchTokenEmulator;
1112
use PhpParser\Lexer\TokenEmulator\NullsafeTokenEmulator;
1213
use PhpParser\Lexer\TokenEmulator\NumericLiteralSeparatorEmulator;
1314
use PhpParser\Lexer\TokenEmulator\ReverseEmulator;
14-
use PhpParser\Lexer\TokenEmulator\TokenEmulatorInterface;
15+
use PhpParser\Lexer\TokenEmulator\TokenEmulator;
1516
use PhpParser\Parser\Tokens;
1617

1718
class Emulative extends Lexer
@@ -20,16 +21,10 @@ class Emulative extends Lexer
2021
const PHP_7_4 = '7.4dev';
2122
const PHP_8_0 = '8.0dev';
2223

23-
const FLEXIBLE_DOC_STRING_REGEX = <<<'REGEX'
24-
/<<<[ \t]*(['"]?)([a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*)\1\r?\n
25-
(?:.*\r?\n)*?
26-
(?<indentation>\h*)\2(?![a-zA-Z0-9_\x80-\xff])(?<separator>(?:;?[\r\n])?)/x
27-
REGEX;
28-
2924
/** @var mixed[] Patches used to reverse changes introduced in the code */
3025
private $patches = [];
3126

32-
/** @var TokenEmulatorInterface[] */
27+
/** @var TokenEmulator[] */
3328
private $emulators = [];
3429

3530
/** @var string */
@@ -48,6 +43,7 @@ public function __construct(array $options = [])
4843
parent::__construct($options);
4944

5045
$emulators = [
46+
new FlexibleDocStringEmulator(),
5147
new FnTokenEmulator(),
5248
new MatchTokenEmulator(),
5349
new CoaleseEqualTokenEmulator(),
@@ -68,19 +64,23 @@ public function __construct(array $options = [])
6864
}
6965

7066
public function startLexing(string $code, ErrorHandler $errorHandler = null) {
71-
$this->patches = [];
67+
$emulators = array_filter($this->emulators, function($emulator) use($code) {
68+
return $emulator->isEmulationNeeded($code);
69+
});
7270

73-
if ($this->isEmulationNeeded($code) === false) {
71+
if (empty($emulators)) {
7472
// Nothing to emulate, yay
7573
parent::startLexing($code, $errorHandler);
7674
return;
7775
}
7876

79-
$collector = new ErrorHandler\Collecting();
77+
$this->patches = [];
78+
foreach ($emulators as $emulator) {
79+
$code = $emulator->preprocessCode($code, $this->patches);
80+
}
8081

81-
// 1. emulation of heredoc and nowdoc new syntax
82-
$preparedCode = $this->processHeredocNowdoc($code);
83-
parent::startLexing($preparedCode, $collector);
82+
$collector = new ErrorHandler\Collecting();
83+
parent::startLexing($code, $collector);
8484
$this->fixupTokens();
8585

8686
$errors = $collector->getErrors();
@@ -91,10 +91,8 @@ public function startLexing(string $code, ErrorHandler $errorHandler = null) {
9191
}
9292
}
9393

94-
foreach ($this->emulators as $emulator) {
95-
if ($emulator->isEmulationNeeded($code)) {
96-
$this->tokens = $emulator->emulate($code, $this->tokens);
97-
}
94+
foreach ($emulators as $emulator) {
95+
$this->tokens = $emulator->emulate($code, $this->tokens);
9896
}
9997
}
10098

@@ -108,71 +106,6 @@ private function isReverseEmulationNeeded(string $emulatorPhpVersion): bool {
108106
&& version_compare($this->targetPhpVersion, $emulatorPhpVersion, '<');
109107
}
110108

111-
private function isHeredocNowdocEmulationNeeded(string $code): bool
112-
{
113-
if (!$this->isForwardEmulationNeeded(self::PHP_7_3)) {
114-
return false;
115-
}
116-
117-
return strpos($code, '<<<') !== false;
118-
}
119-
120-
private function processHeredocNowdoc(string $code): string
121-
{
122-
if ($this->isHeredocNowdocEmulationNeeded($code) === false) {
123-
return $code;
124-
}
125-
126-
if (!preg_match_all(self::FLEXIBLE_DOC_STRING_REGEX, $code, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE)) {
127-
// No heredoc/nowdoc found
128-
return $code;
129-
}
130-
131-
// Keep track of how much we need to adjust string offsets due to the modifications we
132-
// already made
133-
$posDelta = 0;
134-
foreach ($matches as $match) {
135-
$indentation = $match['indentation'][0];
136-
$indentationStart = $match['indentation'][1];
137-
138-
$separator = $match['separator'][0];
139-
$separatorStart = $match['separator'][1];
140-
141-
if ($indentation === '' && $separator !== '') {
142-
// Ordinary heredoc/nowdoc
143-
continue;
144-
}
145-
146-
if ($indentation !== '') {
147-
// Remove indentation
148-
$indentationLen = strlen($indentation);
149-
$code = substr_replace($code, '', $indentationStart + $posDelta, $indentationLen);
150-
$this->patches[] = [$indentationStart + $posDelta, 'add', $indentation];
151-
$posDelta -= $indentationLen;
152-
}
153-
154-
if ($separator === '') {
155-
// Insert newline as separator
156-
$code = substr_replace($code, "\n", $separatorStart + $posDelta, 0);
157-
$this->patches[] = [$separatorStart + $posDelta, 'remove', "\n"];
158-
$posDelta += 1;
159-
}
160-
}
161-
162-
return $code;
163-
}
164-
165-
private function isEmulationNeeded(string $code): bool
166-
{
167-
foreach ($this->emulators as $emulator) {
168-
if ($emulator->isEmulationNeeded($code)) {
169-
return true;
170-
}
171-
}
172-
173-
return $this->isHeredocNowdocEmulationNeeded($code);
174-
}
175-
176109
private function fixupTokens()
177110
{
178111
if (\count($this->patches) === 0) {

lib/PhpParser/Lexer/TokenEmulator/CoaleseEqualTokenEmulator.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
use PhpParser\Lexer\Emulative;
66

7-
final class CoaleseEqualTokenEmulator implements TokenEmulatorInterface
7+
final class CoaleseEqualTokenEmulator extends TokenEmulator
88
{
99
public function getPhpVersion(): string
1010
{
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
<?php declare(strict_types=1);
2+
3+
namespace PhpParser\Lexer\TokenEmulator;
4+
5+
use PhpParser\Lexer\Emulative;
6+
7+
final class FlexibleDocStringEmulator extends TokenEmulator
8+
{
9+
const FLEXIBLE_DOC_STRING_REGEX = <<<'REGEX'
10+
/<<<[ \t]*(['"]?)([a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*)\1\r?\n
11+
(?:.*\r?\n)*?
12+
(?<indentation>\h*)\2(?![a-zA-Z0-9_\x80-\xff])(?<separator>(?:;?[\r\n])?)/x
13+
REGEX;
14+
15+
public function getPhpVersion(): string
16+
{
17+
return Emulative::PHP_7_3;
18+
}
19+
20+
public function isEmulationNeeded(string $code) : bool
21+
{
22+
return strpos($code, '<<<') !== false;
23+
}
24+
25+
public function emulate(string $code, array $tokens): array
26+
{
27+
// Handled by preprocessing + fixup.
28+
return $tokens;
29+
}
30+
31+
public function reverseEmulate(string $code, array $tokens): array
32+
{
33+
// Not supported.
34+
return $tokens;
35+
}
36+
37+
public function preprocessCode(string $code, array &$patches): string {
38+
if (!preg_match_all(self::FLEXIBLE_DOC_STRING_REGEX, $code, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE)) {
39+
// No heredoc/nowdoc found
40+
return $code;
41+
}
42+
43+
// Keep track of how much we need to adjust string offsets due to the modifications we
44+
// already made
45+
$posDelta = 0;
46+
foreach ($matches as $match) {
47+
$indentation = $match['indentation'][0];
48+
$indentationStart = $match['indentation'][1];
49+
50+
$separator = $match['separator'][0];
51+
$separatorStart = $match['separator'][1];
52+
53+
if ($indentation === '' && $separator !== '') {
54+
// Ordinary heredoc/nowdoc
55+
continue;
56+
}
57+
58+
if ($indentation !== '') {
59+
// Remove indentation
60+
$indentationLen = strlen($indentation);
61+
$code = substr_replace($code, '', $indentationStart + $posDelta, $indentationLen);
62+
$patches[] = [$indentationStart + $posDelta, 'add', $indentation];
63+
$posDelta -= $indentationLen;
64+
}
65+
66+
if ($separator === '') {
67+
// Insert newline as separator
68+
$code = substr_replace($code, "\n", $separatorStart + $posDelta, 0);
69+
$patches[] = [$separatorStart + $posDelta, 'remove', "\n"];
70+
$posDelta += 1;
71+
}
72+
}
73+
74+
return $code;
75+
}
76+
}

lib/PhpParser/Lexer/TokenEmulator/KeywordEmulator.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
namespace PhpParser\Lexer\TokenEmulator;
44

5-
abstract class KeywordEmulator implements TokenEmulatorInterface
5+
abstract class KeywordEmulator extends TokenEmulator
66
{
77
abstract function getKeywordString(): string;
88
abstract function getKeywordToken(): int;

lib/PhpParser/Lexer/TokenEmulator/NullsafeTokenEmulator.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
use PhpParser\Lexer\Emulative;
66

7-
final class NullsafeTokenEmulator implements TokenEmulatorInterface
7+
final class NullsafeTokenEmulator extends TokenEmulator
88
{
99
public function getPhpVersion(): string
1010
{

lib/PhpParser/Lexer/TokenEmulator/NumericLiteralSeparatorEmulator.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
use PhpParser\Lexer\Emulative;
66

7-
final class NumericLiteralSeparatorEmulator implements TokenEmulatorInterface
7+
final class NumericLiteralSeparatorEmulator extends TokenEmulator
88
{
99
const BIN = '(?:0b[01]+(?:_[01]+)*)';
1010
const HEX = '(?:0x[0-9a-f]+(?:_[0-9a-f]+)*)';

lib/PhpParser/Lexer/TokenEmulator/ReverseEmulator.php

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@
55
/**
66
* Reverses emulation direction of the inner emulator.
77
*/
8-
final class ReverseEmulator implements TokenEmulatorInterface
8+
final class ReverseEmulator extends TokenEmulator
99
{
10-
/** @var TokenEmulatorInterface Inner emulator */
10+
/** @var TokenEmulator Inner emulator */
1111
private $emulator;
1212

13-
public function __construct(TokenEmulatorInterface $emulator) {
13+
public function __construct(TokenEmulator $emulator) {
1414
$this->emulator = $emulator;
1515
}
1616

@@ -29,4 +29,8 @@ public function emulate(string $code, array $tokens): array {
2929
public function reverseEmulate(string $code, array $tokens): array {
3030
return $this->emulator->emulate($code, $tokens);
3131
}
32+
33+
public function preprocessCode(string $code, array &$patches): string {
34+
return $code;
35+
}
3236
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
<?php declare(strict_types=1);
2+
3+
namespace PhpParser\Lexer\TokenEmulator;
4+
5+
/** @internal */
6+
abstract class TokenEmulator
7+
{
8+
abstract public function getPhpVersion(): string;
9+
10+
abstract public function isEmulationNeeded(string $code): bool;
11+
12+
/**
13+
* @return array Modified Tokens
14+
*/
15+
abstract public function emulate(string $code, array $tokens): array;
16+
17+
/**
18+
* @return array Modified Tokens
19+
*/
20+
abstract public function reverseEmulate(string $code, array $tokens): array;
21+
22+
public function preprocessCode(string $code, array &$patches): string {
23+
return $code;
24+
}
25+
}

lib/PhpParser/Lexer/TokenEmulator/TokenEmulatorInterface.php

Lines changed: 0 additions & 21 deletions
This file was deleted.

0 commit comments

Comments
 (0)