Skip to content

Commit 3e4a30b

Browse files
1 parent 0df676e commit 3e4a30b

4 files changed

+326
-65
lines changed

src/Differ.php

Lines changed: 19 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@
4444

4545
namespace SebastianBergmann\Diff;
4646

47+
use SebastianBergmann\Diff\LCS\LongestCommonSubsequence;
48+
use SebastianBergmann\Diff\LCS\TimeEfficientImplementation;
49+
4750
/**
4851
* Diff implementation.
4952
*
@@ -62,8 +65,6 @@ class Differ
6265
private $header;
6366

6467
/**
65-
* Constructor
66-
*
6768
* @param string $header
6869
*/
6970
public function __construct($header = "--- Original\n+++ New\n")
@@ -74,14 +75,15 @@ public function __construct($header = "--- Original\n+++ New\n")
7475
/**
7576
* Returns the diff between two arrays or strings as string.
7677
*
77-
* @param array|string $from
78-
* @param array|string $to
78+
* @param array|string $from
79+
* @param array|string $to
80+
* @param LongestCommonSubsequence $lcs
7981
* @return string
8082
*/
81-
public function diff($from, $to)
83+
public function diff($from, $to, LongestCommonSubsequence $lcs = null)
8284
{
8385
$buffer = $this->header;
84-
$diff = $this->diffToArray($from, $to);
86+
$diff = $this->diffToArray($from, $to, $lcs);
8587

8688
$inOld = false;
8789
$i = 0;
@@ -147,12 +149,18 @@ public function diff($from, $to)
147149
* - 1: ADDED: $token was added to $from
148150
* - 0: OLD: $token is not changed in $to
149151
*
150-
* @param array|string $from
151-
* @param array|string $to
152+
* @param array|string $from
153+
* @param array|string $to
154+
* @param LongestCommonSubsequence $lcs
152155
* @return array
153156
*/
154-
public function diffToArray($from, $to)
157+
public function diffToArray($from, $to, LongestCommonSubsequence $lcs = null)
155158
{
159+
if ($lcs === null) {
160+
// @todo Automagically choose best strategy based on input size
161+
$lcs = new TimeEfficientImplementation;
162+
}
163+
156164
preg_match_all('(\r\n|\r|\n)', $from, $fromMatches);
157165
preg_match_all('(\r\n|\r|\n)', $to, $toMatches);
158166

@@ -190,12 +198,8 @@ public function diffToArray($from, $to)
190198
}
191199
}
192200

193-
$common = $this->longestCommonSubsequence(
194-
array_values($from),
195-
array_values($to)
196-
);
197-
198-
$diff = array();
201+
$common = $lcs->calculate(array_values($from), array_values($to));
202+
$diff = array();
199203

200204
if (isset($fromMatches[0]) && $toMatches[0] &&
201205
count($fromMatches[0]) === count($toMatches[0]) &&
@@ -241,54 +245,4 @@ public function diffToArray($from, $to)
241245

242246
return $diff;
243247
}
244-
245-
/**
246-
* Calculates the longest common subsequence of two arrays.
247-
*
248-
* @param array $from
249-
* @param array $to
250-
* @return array
251-
*/
252-
private function longestCommonSubsequence(array $from, array $to)
253-
{
254-
$common = array();
255-
$matrix = array();
256-
$fromLength = count($from);
257-
$toLength = count($to);
258-
259-
for ($i = 0; $i <= $fromLength; ++$i) {
260-
$matrix[$i][0] = 0;
261-
}
262-
263-
for ($j = 0; $j <= $toLength; ++$j) {
264-
$matrix[0][$j] = 0;
265-
}
266-
267-
for ($i = 1; $i <= $fromLength; ++$i) {
268-
for ($j = 1; $j <= $toLength; ++$j) {
269-
$matrix[$i][$j] = max(
270-
$matrix[$i-1][$j],
271-
$matrix[$i][$j-1],
272-
$from[$i-1] === $to[$j-1] ? $matrix[$i-1][$j-1] + 1 : 0
273-
);
274-
}
275-
}
276-
277-
$i = $fromLength;
278-
$j = $toLength;
279-
280-
while ($i > 0 && $j > 0) {
281-
if ($from[$i-1] === $to[$j-1]) {
282-
array_unshift($common, $from[$i-1]);
283-
--$i;
284-
--$j;
285-
} elseif ($matrix[$i][$j-1] > $matrix[$i-1][$j]) {
286-
--$j;
287-
} else {
288-
--$i;
289-
}
290-
}
291-
292-
return $common;
293-
}
294248
}

src/LCS/LongestCommonSubsequence.php

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
<?php
2+
/**
3+
* Diff
4+
*
5+
* Copyright (c) 2001-2014, Sebastian Bergmann <[email protected]>.
6+
* All rights reserved.
7+
*
8+
* Redistribution and use in source and binary forms, with or without
9+
* modification, are permitted provided that the following conditions
10+
* are met:
11+
*
12+
* * Redistributions of source code must retain the above copyright
13+
* notice, this list of conditions and the following disclaimer.
14+
*
15+
* * Redistributions in binary form must reproduce the above copyright
16+
* notice, this list of conditions and the following disclaimer in
17+
* the documentation and/or other materials provided with the
18+
* distribution.
19+
*
20+
* * Neither the name of Sebastian Bergmann nor the names of his
21+
* contributors may be used to endorse or promote products derived
22+
* from this software without specific prior written permission.
23+
*
24+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25+
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26+
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27+
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
28+
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
29+
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
30+
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31+
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32+
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33+
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
34+
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35+
* POSSIBILITY OF SUCH DAMAGE.
36+
*
37+
* @package Diff
38+
* @author Sebastian Bergmann <[email protected]>
39+
* @author Kore Nordmann <[email protected]>
40+
* @copyright 2001-2014 Sebastian Bergmann <[email protected]>
41+
* @license http://www.opensource.org/licenses/BSD-3-Clause The BSD 3-Clause License
42+
* @link http://www.github.com/sebastianbergmann/diff
43+
*/
44+
45+
namespace SebastianBergmann\Diff\LCS;
46+
47+
/**
48+
* Interface for implementations of longest common subsequence calculation.
49+
*
50+
* @package Diff
51+
* @author Sebastian Bergmann <[email protected]>
52+
* @author Kore Nordmann <[email protected]>
53+
* @copyright 2001-2014 Sebastian Bergmann <[email protected]>
54+
* @license http://www.opensource.org/licenses/BSD-3-Clause The BSD 3-Clause License
55+
* @link http://www.github.com/sebastianbergmann/diff
56+
*/
57+
interface LongestCommonSubsequence
58+
{
59+
/**
60+
* Calculates the longest common subsequence of two arrays.
61+
*
62+
* @param array $from
63+
* @param array $to
64+
* @return array
65+
*/
66+
public function calculate(array $from, array $to);
67+
}
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
<?php
2+
/**
3+
* Diff
4+
*
5+
* Copyright (c) 2001-2014, Sebastian Bergmann <[email protected]>.
6+
* All rights reserved.
7+
*
8+
* Redistribution and use in source and binary forms, with or without
9+
* modification, are permitted provided that the following conditions
10+
* are met:
11+
*
12+
* * Redistributions of source code must retain the above copyright
13+
* notice, this list of conditions and the following disclaimer.
14+
*
15+
* * Redistributions in binary form must reproduce the above copyright
16+
* notice, this list of conditions and the following disclaimer in
17+
* the documentation and/or other materials provided with the
18+
* distribution.
19+
*
20+
* * Neither the name of Sebastian Bergmann nor the names of his
21+
* contributors may be used to endorse or promote products derived
22+
* from this software without specific prior written permission.
23+
*
24+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25+
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26+
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27+
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
28+
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
29+
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
30+
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31+
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32+
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33+
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
34+
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35+
* POSSIBILITY OF SUCH DAMAGE.
36+
*
37+
* @package Diff
38+
* @author Sebastian Bergmann <[email protected]>
39+
* @author Kore Nordmann <[email protected]>
40+
* @copyright 2001-2014 Sebastian Bergmann <[email protected]>
41+
* @license http://www.opensource.org/licenses/BSD-3-Clause The BSD 3-Clause License
42+
* @link http://www.github.com/sebastianbergmann/diff
43+
*/
44+
45+
namespace SebastianBergmann\Diff\LCS;
46+
47+
/**
48+
* Memory-efficient implementation of longest common subsequence calculation.
49+
*
50+
* @package Diff
51+
* @author Sebastian Bergmann <[email protected]>
52+
* @author Denes Lados <[email protected]>
53+
* @copyright 2001-2014 Sebastian Bergmann <[email protected]>
54+
* @license http://www.opensource.org/licenses/BSD-3-Clause The BSD 3-Clause License
55+
* @link http://www.github.com/sebastianbergmann/diff
56+
*/
57+
class MemoryEfficientImplementation implements LongestCommonSubsequence
58+
{
59+
/**
60+
* Calculates the longest common subsequence of two arrays.
61+
*
62+
* @param array $from
63+
* @param array $to
64+
* @return array
65+
*/
66+
public function calculate(array $from, array $to)
67+
{
68+
$cFrom = count($from);
69+
$cTo = count($to);
70+
71+
if ($cFrom == 0) {
72+
return array();
73+
} elseif ($cFrom == 1) {
74+
if (in_array($from[0], $to)) {
75+
return array($from[0]);
76+
} else {
77+
return array();
78+
}
79+
} else {
80+
$i = intval($cFrom / 2);
81+
$fromStart = array_slice($from, 0, $i);
82+
$fromEnd = array_slice($from, $i);
83+
$llB = $this->length($fromStart, $to);
84+
$llE = $this->length(array_reverse($fromEnd), array_reverse($to));
85+
$jMax = 0;
86+
$max = 0;
87+
88+
for($j = 0; $j <= $cTo; $j++) {
89+
$m = $llB[$j] + $llE[$cTo - $j];
90+
91+
if ($m >= $max) {
92+
$max = $m;
93+
$jMax = $j;
94+
}
95+
}
96+
97+
$toStart = array_slice($to, 0, $jMax);
98+
$toEnd = array_slice($to, $jMax);
99+
100+
return array_merge(
101+
$this->calculate($fromStart, $toStart),
102+
$this->calculate($fromEnd, $toEnd)
103+
);
104+
}
105+
}
106+
107+
/**
108+
* @param array $from
109+
* @param array $to
110+
* @return array
111+
*/
112+
private function length(array $from, array $to)
113+
{
114+
$current = array_fill(0, count($to) + 1, 0);
115+
$cFrom = count($from);
116+
$cTo = count($to);
117+
118+
for($i = 0; $i < $cFrom; $i++) {
119+
$prev = $current;
120+
121+
for ($j = 0; $j < $cTo; $j++) {
122+
if ($from[$i] == $to[$j]) {
123+
$current[$j + 1] = $prev[$j] + 1;
124+
} else {
125+
$current[$j + 1] = max($current[$j], $prev[$j + 1]);
126+
}
127+
}
128+
}
129+
130+
return $current;
131+
}
132+
}

0 commit comments

Comments
 (0)