Skip to content

Commit 1504e60

Browse files
committed
migrated to swift 3
1 parent 40f667c commit 1504e60

File tree

9 files changed

+228
-685
lines changed

9 files changed

+228
-685
lines changed

Run-Length Encoding/README.markdown

Lines changed: 62 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33
RLE is probably the simplest way to do compression. Let's say you have data that looks like this:
44

55
aaaaabbbcdeeeeeeef...
6-
6+
77
then RLE encodes it as follows:
88

99
5a3b1c1d7e1f...
1010

1111
Instead of repeating bytes, you first write how often that byte occurs and then the byte's actual value. So `5a` means `aaaaa`. If the data has a lot of "byte runs", that is lots of repeating bytes, then RLE can save quite a bit of space. It works quite well on images.
1212

13-
There are many different ways you can implement RLE. Here's an extension of `NSData` that does a version of RLE inspired by the old [PCX image file format](https://en.wikipedia.org/wiki/PCX).
13+
There are many different ways you can implement RLE. Here's an extension of `Data` that does a version of RLE inspired by the old [PCX image file format](https://en.wikipedia.org/wiki/PCX).
1414

1515
The rules are these:
1616

@@ -20,44 +20,42 @@ The rules are these:
2020

2121
- A single byte in the range 192 - 255 is represented by two bytes: first the byte 192 (meaning a run of 1 byte), followed by the actual value.
2222

23-
Here is the compression code. It returns a new `NSData` object containing the run-length encoded bytes:
23+
Here is the compression code. It returns a new `Data` object containing the run-length encoded bytes:
2424

2525
```swift
26-
extension NSData {
27-
public func compressRLE() -> NSData {
28-
let data = NSMutableData()
29-
if length > 0 {
30-
var ptr = UnsafePointer<UInt8>(bytes)
31-
let end = ptr + length
32-
33-
while ptr < end { // 1
34-
var count = 0
35-
var byte = ptr.memory
36-
var next = byte
37-
38-
while next == byte && ptr < end && count < 64 { // 2
39-
ptr = ptr.advancedBy(1)
40-
next = ptr.memory
41-
count += 1
42-
}
43-
44-
if count > 1 || byte >= 192 { // 3
45-
var size = 191 + UInt8(count)
46-
data.appendBytes(&size, length: 1)
47-
data.appendBytes(&byte, length: 1)
48-
} else { // 4
49-
data.appendBytes(&byte, length: 1)
26+
extension Data {
27+
public func compressRLE() -> Data {
28+
var data = Data()
29+
self.withUnsafeBytes { (uPtr: UnsafePointer<UInt8>) in
30+
var ptr = uPtr
31+
let end = ptr + count
32+
while ptr < end { //1
33+
var count = 0
34+
var byte = ptr.pointee
35+
var next = byte
36+
37+
while next == byte && ptr < end && count < 64 { //2
38+
ptr = ptr.advanced(by: 1)
39+
next = ptr.pointee
40+
count += 1
41+
}
42+
43+
if count > 1 || byte >= 192 { // 3
44+
var size = 191 + UInt8(count)
45+
data.append(&size, count: 1)
46+
data.append(&byte, count: 1)
47+
} else { // 4
48+
data.append(&byte, count: 1)
49+
}
50+
}
5051
}
51-
}
52+
return data
5253
}
53-
return data
54-
}
55-
}
5654
```
5755

5856
How it works:
5957

60-
1. We use an `UnsafePointer` to step through the bytes of the original `NSData` object.
58+
1. We use an `UnsafePointer` to step through the bytes of the original `Data` object.
6159

6260
2. At this point we've read the current byte value into the `byte` variable. If the next byte is the same, then we keep reading until we find a byte value that is different, or we reach the end of the data. We also stop if the run is 64 bytes because that's the maximum we can encode.
6361

@@ -69,11 +67,11 @@ You can test it like this in a playground:
6967

7068
```swift
7169
let originalString = "aaaaabbbcdeeeeeeef"
72-
let utf8 = originalString.dataUsingEncoding(NSUTF8StringEncoding)!
70+
let utf8 = originalString.data(using: String.Encoding.utf8)!
7371
let compressed = utf8.compressRLE()
7472
```
7573

76-
The compressed `NSData` object should be `<c461c262 6364c665 66>`. Let's decode that by hand to see what has happened:
74+
The compressed `Data` object should be `<c461c262 6364c665 66>`. Let's decode that by hand to see what has happened:
7775

7876
c4 This is 196 in decimal. It means the next byte appears 5 times.
7977
61 The data byte "a".
@@ -90,34 +88,38 @@ So that's 9 bytes encoded versus 18 original. That's a savings of 50%. Of course
9088
Here is the decompression code:
9189

9290
```swift
93-
public func decompressRLE() -> NSData {
94-
let data = NSMutableData()
95-
if length > 0 {
96-
var ptr = UnsafePointer<UInt8>(bytes)
97-
let end = ptr + length
98-
99-
while ptr < end {
100-
var byte = ptr.memory // 1
101-
ptr = ptr.advancedBy(1)
102-
103-
if byte < 192 { // 2
104-
data.appendBytes(&byte, length: 1)
105-
106-
} else if ptr < end { // 3
107-
var value = ptr.memory
108-
ptr = ptr.advancedBy(1)
109-
110-
for _ in 0 ..< byte - 191 {
111-
data.appendBytes(&value, length: 1)
112-
}
91+
public func decompressRLE() -> Data {
92+
var data = Data()
93+
self.withUnsafeBytes { (uPtr: UnsafePointer<UInt8>) in
94+
var ptr = uPtr
95+
let end = ptr + count
96+
97+
while ptr < end {
98+
// Read the next byte. This is either a single value less than 192,
99+
// or the start of a byte run.
100+
var byte = ptr.pointee // 1
101+
ptr = ptr.advanced(by: 1)
102+
103+
if byte < 192 { // 2
104+
data.append(&byte, count: 1)
105+
} else if ptr < end { // 3
106+
// Read the actual data value.
107+
var value = ptr.pointee
108+
ptr = ptr.advanced(by: 1)
109+
110+
// And write it out repeatedly.
111+
for _ in 0 ..< byte - 191 {
112+
data.append(&value, count: 1)
113+
}
114+
}
115+
}
113116
}
114-
}
117+
return data
115118
}
116-
return data
117-
}
119+
118120
```
119121

120-
1. Again this uses an `UnsafePointer` to read the `NSData`. Here we read the next byte; this is either a single value less than 192, or the start of a byte run.
122+
1. Again this uses an `UnsafePointer` to read the `Data`. Here we read the next byte; this is either a single value less than 192, or the start of a byte run.
121123

122124
2. If it's a single value, then it's just a matter of copying it to the output.
123125

@@ -134,6 +136,7 @@ And now `originalString == restoredString` must be true!
134136

135137
Footnote: The original PCX implementation is slightly different. There, a byte value of 192 (0xC0) means that the following byte will be repeated 0 times. This also limits the maximum run size to 63 bytes. Because it makes no sense to store bytes that don't occur, in my implementation 192 means the next byte appears once, and the maximum run length is 64 bytes.
136138

137-
This was probably a trade-off when they designed the PCX format way back when. If you look at it in binary, the upper two bits indicate whether a byte is compressed. (If both bits are set then the byte value is 192 or more.) To get the run length you can simply do `byte & 0x3F`, giving you a value in the range 0 to 63.
139+
This was probably a trade-off when they designed the PCX format way back when. If you look at it in binary, the upper two bits indicate whether a byte is compressed. (If both bits are set then the byte value is 192 or more.) To get the run length you can simply do `byte & 0x3F`, giving you a value in the range 0 to 63.
138140

139141
*Written for Swift Algorithm Club by Matthijs Hollemans*
142+
*Migrated to Swift3 by Jaap Wijnen*

Run-Length Encoding/RLE.playground/Contents.swift

Lines changed: 98 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -2,81 +2,115 @@
22

33
import Foundation
44

5-
extension NSData {
6-
/*
7-
Compresses the NSData using run-length encoding.
8-
*/
9-
public func compressRLE() -> NSData {
10-
let data = NSMutableData()
11-
if length > 0 {
12-
var ptr = UnsafePointer<UInt8>(bytes)
13-
let end = ptr + length
5+
let originalString = "aaaaabbbcdeeeeeeef"
6+
let utf8 = originalString.data(using: String.Encoding.utf8)!
7+
let compressed = utf8.compressRLE()
148

15-
while ptr < end {
16-
var count = 0
17-
var byte = ptr.memory
18-
var next = byte
9+
let decompressed = compressed.decompressRLE()
10+
let restoredString = String(data: decompressed, encoding: String.Encoding.utf8)
11+
originalString == restoredString
1912

20-
// Is the next byte the same? Keep reading until we find a different
21-
// value, or we reach the end of the data, or the run is 64 bytes.
22-
while next == byte && ptr < end && count < 64 {
23-
ptr = ptr.advancedBy(1)
24-
next = ptr.memory
25-
count += 1
26-
}
13+
func encodeAndDecode(_ bytes: [UInt8]) -> Bool {
14+
var bytes = bytes
15+
16+
var data1 = Data(bytes: &bytes, count: bytes.count)
17+
print("data1 is \(data1.count) bytes")
18+
19+
var rleData = data1.compressRLE()
20+
print("encoded data is \(rleData.count) bytes")
21+
22+
var data2 = rleData.decompressRLE()
23+
print("data2 is \(data2.count) bytes")
24+
25+
return data1 == data2
26+
}
2727

28-
if count > 1 || byte >= 192 { // byte run of up to 64 repeats
29-
var size = 191 + UInt8(count)
30-
data.appendBytes(&size, length: 1)
31-
data.appendBytes(&byte, length: 1)
32-
} else { // single byte between 0 and 192
33-
data.appendBytes(&byte, length: 1)
34-
}
35-
}
36-
}
37-
return data
38-
}
28+
func testEmpty() -> Bool {
29+
let bytes: [UInt8] = []
30+
return encodeAndDecode(bytes)
31+
}
3932

40-
/*
41-
Converts a run-length encoded NSData back to the original.
42-
*/
43-
public func decompressRLE() -> NSData {
44-
let data = NSMutableData()
45-
if length > 0 {
46-
var ptr = UnsafePointer<UInt8>(bytes)
47-
let end = ptr + length
33+
func testOneByteWithLowValue() -> Bool {
34+
let bytes: [UInt8] = [0x80]
35+
return encodeAndDecode(bytes)
36+
}
4837

49-
while ptr < end {
50-
// Read the next byte. This is either a single value less than 192,
51-
// or the start of a byte run.
52-
var byte = ptr.memory
53-
ptr = ptr.advancedBy(1)
38+
func testOneByteWithHighValue() -> Bool {
39+
let bytes: [UInt8] = [0xD0]
40+
return encodeAndDecode(bytes)
41+
}
5442

55-
if byte < 192 { // single value
56-
data.appendBytes(&byte, length: 1)
43+
func testSimpleCases() -> Bool {
44+
let bytes: [UInt8] = [
45+
0x00,
46+
0x20, 0x20, 0x20, 0x20, 0x20,
47+
0x30,
48+
0x00, 0x00,
49+
0xC0,
50+
0xC1,
51+
0xC0, 0xC0, 0xC0,
52+
0xFF, 0xFF, 0xFF, 0xFF
53+
]
54+
return encodeAndDecode(bytes)
55+
}
5756

58-
} else if ptr < end { // byte run
59-
// Read the actual data value.
60-
var value = ptr.memory
61-
ptr = ptr.advancedBy(1)
57+
func testBufferWithoutSpans() -> Bool {
58+
// There is nothing that can be encoded in this buffer, so the encoded
59+
// data ends up being longer.
60+
var bytes: [UInt8] = []
61+
for i in 0..<1024 {
62+
bytes.append(UInt8(i%256))
63+
}
64+
return encodeAndDecode(bytes)
65+
}
6266

63-
// And write it out repeatedly.
64-
for _ in 0 ..< byte - 191 {
65-
data.appendBytes(&value, length: 1)
66-
}
67+
func testBufferWithSpans(_ spanSize: Int) -> Bool {
68+
print("span size \(spanSize)")
69+
70+
let length = spanSize * 32
71+
var bytes: [UInt8] = Array<UInt8>(repeating: 0, count: length)
72+
73+
for t in stride(from: 0, to: length, by: spanSize) {
74+
for i in 0..<spanSize {
75+
bytes[t + i] = UInt8(t % 256)
6776
}
68-
}
6977
}
70-
return data
71-
}
78+
return encodeAndDecode(bytes)
7279
}
7380

81+
func testRandomByte() -> Bool {
82+
let length = 1 + Int(arc4random_uniform(2048))
83+
var bytes: [UInt8] = []
84+
for _ in 0..<length {
85+
bytes.append(UInt8(arc4random() % 256))
86+
}
87+
return encodeAndDecode(bytes)
88+
}
7489

90+
func runTests() -> Bool {
91+
var tests: [Bool] = [
92+
testEmpty(),
93+
testOneByteWithLowValue(),
94+
testOneByteWithHighValue(),
95+
testSimpleCases(),
96+
testBufferWithoutSpans(),
97+
testBufferWithSpans(4),
98+
testBufferWithSpans(63),
99+
testBufferWithSpans(64),
100+
testBufferWithSpans(65),
101+
testBufferWithSpans(66),
102+
testBufferWithSpans(80)
103+
]
104+
for _ in 0..<10 {
105+
let result = testRandomByte()
106+
tests.append(result)
107+
}
108+
var result = true
109+
for bool in tests {
110+
result = result && bool
111+
}
112+
113+
return result
114+
}
75115

76-
let originalString = "aaaaabbbcdeeeeeeef"
77-
let utf8 = originalString.dataUsingEncoding(NSUTF8StringEncoding)!
78-
let compressed = utf8.compressRLE()
79-
80-
let decompressed = compressed.decompressRLE()
81-
let restoredString = String(data: decompressed, encoding: NSUTF8StringEncoding)
82-
originalString == restoredString
116+
runTests()

0 commit comments

Comments
 (0)