Huffman Coding API in Swift
up vote
0
down vote
favorite
This question is a follow up question to: this post
I'd love some advice on my implementation and the API of the Huffman
class.
I'm also not sure how to test if my implementation is actually resulting in less bytes than a string. It seems that encoded.count
(as a Data) is larger than word.utf8.count
(as a String). Maybe I'm just not testing on large enough strings?
Also any thoughts on HuffData
storing the code
ex. ["00","01"]
and the frequencyTable
instead of the tree.
Here’s an example of how the API is used:
let word = "MISSISSIPPI_RIVER!"
let encoded = try? Huffman.encode(word)
let decoded = try? Huffman.decode(encoded!)
XCTAssertEqual(decoded, word)
Here's the code:
import Foundation
struct HuffData: Codable {
var code: [String]
var frequencyTable: [String: String]
}
class Huffman {
static func decode(_ data: Data) throws -> String {
do {
let huff = try JSONDecoder().decode(HuffData.self, from: data)
let reverseTable = Dictionary(uniqueKeysWithValues: zip(huff.frequencyTable.values, huff.frequencyTable.keys))
return huff.code.compactMap({ reverseTable[$0]}).joined()
}
catch let error {
throw error
}
}
static func encode(_ input: String) throws -> Data {
let frequencyTable = Huffman.buildFrequencyTable(for: input)
let code = input.compactMap({frequencyTable[String($0)]})
let huff = HuffData(code: code, frequencyTable: frequencyTable)
do {
let data = try JSONEncoder().encode(huff)
return data
}
catch let error {
throw error
}
}
static private func buildFrequencyTable(for input: String) -> [String: String] {
// count letter frequency
let sortedFrequency = input.reduce(into: [String: Int](), { freq, char in
freq[String(char), default: 0] += 1
})
// create queue of initial Nodes
let queue = sortedFrequency.map{ Node(name: $0.key, value: $0.value)}
// generate key by traversing tree
return Huffman.generateKey(for: Huffman.createTree(with: queue), prefix: "")
}
static private func generateKey(for node: Node, prefix: String) -> [String: String] {
var key = [String: String]()
if let left = node.left, let right = node.right {
key.merge(generateKey(for: left, prefix: prefix + "0"), uniquingKeysWith: {current,_ in current})
key.merge(generateKey(for: right, prefix: prefix + "1"), uniquingKeysWith: {current,_ in current})
}else {
key[node.name] = prefix
}
return key
}
static private func createTree(with queue: [Node]) -> Node {
// initialize queue that sorts by decreasing count
var queue = PriorityQueue(queue: queue)
// until we have 1 root node, join subtrees of least frequency
while queue.count > 1 {
let node1 = queue.dequeue()
let node2 = queue.dequeue()
let rootNode = Huffman.createRoot(with: node1, and: node2)
queue.enqueue(node: rootNode)
}
return queue.queue[0]
}
static private func createRoot(with first: Node, and second: Node) -> Node {
return Node(name: "(first.name)(second.name)", value: first.value + second.value, left: first, right: second)
}
}
struct PriorityQueue {
var queue: [Node]
var count: Int {
return queue.count
}
mutating func enqueue(node: Node) {
queue.insert(node, at: queue.index(where: {$0.value <= node.value}) ?? 0)
}
mutating func dequeue() -> Node {
return queue.removeLast()
}
init(queue: [Node]){
// assumes queue will always be sorted by decreasing count
self.queue = queue.sorted(by: {$0.value > $1.value})
}
}
class Node: CustomStringConvertible {
var description: String {
return "(name): (value)"
}
let name: String
let value: Int
let left: Node?
let right: Node?
init(name: String, value: Int, left: Node? = nil, right: Node? = nil) {
self.name = name
self.value = value
self.left = left
self.right = right
}
}
algorithm swift compression
add a comment |
up vote
0
down vote
favorite
This question is a follow up question to: this post
I'd love some advice on my implementation and the API of the Huffman
class.
I'm also not sure how to test if my implementation is actually resulting in less bytes than a string. It seems that encoded.count
(as a Data) is larger than word.utf8.count
(as a String). Maybe I'm just not testing on large enough strings?
Also any thoughts on HuffData
storing the code
ex. ["00","01"]
and the frequencyTable
instead of the tree.
Here’s an example of how the API is used:
let word = "MISSISSIPPI_RIVER!"
let encoded = try? Huffman.encode(word)
let decoded = try? Huffman.decode(encoded!)
XCTAssertEqual(decoded, word)
Here's the code:
import Foundation
struct HuffData: Codable {
var code: [String]
var frequencyTable: [String: String]
}
class Huffman {
static func decode(_ data: Data) throws -> String {
do {
let huff = try JSONDecoder().decode(HuffData.self, from: data)
let reverseTable = Dictionary(uniqueKeysWithValues: zip(huff.frequencyTable.values, huff.frequencyTable.keys))
return huff.code.compactMap({ reverseTable[$0]}).joined()
}
catch let error {
throw error
}
}
static func encode(_ input: String) throws -> Data {
let frequencyTable = Huffman.buildFrequencyTable(for: input)
let code = input.compactMap({frequencyTable[String($0)]})
let huff = HuffData(code: code, frequencyTable: frequencyTable)
do {
let data = try JSONEncoder().encode(huff)
return data
}
catch let error {
throw error
}
}
static private func buildFrequencyTable(for input: String) -> [String: String] {
// count letter frequency
let sortedFrequency = input.reduce(into: [String: Int](), { freq, char in
freq[String(char), default: 0] += 1
})
// create queue of initial Nodes
let queue = sortedFrequency.map{ Node(name: $0.key, value: $0.value)}
// generate key by traversing tree
return Huffman.generateKey(for: Huffman.createTree(with: queue), prefix: "")
}
static private func generateKey(for node: Node, prefix: String) -> [String: String] {
var key = [String: String]()
if let left = node.left, let right = node.right {
key.merge(generateKey(for: left, prefix: prefix + "0"), uniquingKeysWith: {current,_ in current})
key.merge(generateKey(for: right, prefix: prefix + "1"), uniquingKeysWith: {current,_ in current})
}else {
key[node.name] = prefix
}
return key
}
static private func createTree(with queue: [Node]) -> Node {
// initialize queue that sorts by decreasing count
var queue = PriorityQueue(queue: queue)
// until we have 1 root node, join subtrees of least frequency
while queue.count > 1 {
let node1 = queue.dequeue()
let node2 = queue.dequeue()
let rootNode = Huffman.createRoot(with: node1, and: node2)
queue.enqueue(node: rootNode)
}
return queue.queue[0]
}
static private func createRoot(with first: Node, and second: Node) -> Node {
return Node(name: "(first.name)(second.name)", value: first.value + second.value, left: first, right: second)
}
}
struct PriorityQueue {
var queue: [Node]
var count: Int {
return queue.count
}
mutating func enqueue(node: Node) {
queue.insert(node, at: queue.index(where: {$0.value <= node.value}) ?? 0)
}
mutating func dequeue() -> Node {
return queue.removeLast()
}
init(queue: [Node]){
// assumes queue will always be sorted by decreasing count
self.queue = queue.sorted(by: {$0.value > $1.value})
}
}
class Node: CustomStringConvertible {
var description: String {
return "(name): (value)"
}
let name: String
let value: Int
let left: Node?
let right: Node?
init(name: String, value: Int, left: Node? = nil, right: Node? = nil) {
self.name = name
self.value = value
self.left = left
self.right = right
}
}
algorithm swift compression
1
I don't know Swift well enough to be sure, but it seems like your output is 8 times as large as it should be, by neglecting to pack the bits into bytes. That way, since a Huffman code is at least 1 bit, the result is guaranteed to be at least as large as the input.
– harold
yesterday
oh no! I'm pretty sure I'm getting the correct zeros and ones from the tree so I'm not sure how the output is 8 times as large as it should be! I assumed joining the table with the code would add some overhead but would be worth it for strings that are large enough to consider encoding.
– Turnipdabeets
yesterday
1
@harold is quite right: Each zero and one of the encoded sequence is stored as a "0" or "1" character and thus consumes 8 bits.
– Martin R
yesterday
add a comment |
up vote
0
down vote
favorite
up vote
0
down vote
favorite
This question is a follow up question to: this post
I'd love some advice on my implementation and the API of the Huffman
class.
I'm also not sure how to test if my implementation is actually resulting in less bytes than a string. It seems that encoded.count
(as a Data) is larger than word.utf8.count
(as a String). Maybe I'm just not testing on large enough strings?
Also any thoughts on HuffData
storing the code
ex. ["00","01"]
and the frequencyTable
instead of the tree.
Here’s an example of how the API is used:
let word = "MISSISSIPPI_RIVER!"
let encoded = try? Huffman.encode(word)
let decoded = try? Huffman.decode(encoded!)
XCTAssertEqual(decoded, word)
Here's the code:
import Foundation
struct HuffData: Codable {
var code: [String]
var frequencyTable: [String: String]
}
class Huffman {
static func decode(_ data: Data) throws -> String {
do {
let huff = try JSONDecoder().decode(HuffData.self, from: data)
let reverseTable = Dictionary(uniqueKeysWithValues: zip(huff.frequencyTable.values, huff.frequencyTable.keys))
return huff.code.compactMap({ reverseTable[$0]}).joined()
}
catch let error {
throw error
}
}
static func encode(_ input: String) throws -> Data {
let frequencyTable = Huffman.buildFrequencyTable(for: input)
let code = input.compactMap({frequencyTable[String($0)]})
let huff = HuffData(code: code, frequencyTable: frequencyTable)
do {
let data = try JSONEncoder().encode(huff)
return data
}
catch let error {
throw error
}
}
static private func buildFrequencyTable(for input: String) -> [String: String] {
// count letter frequency
let sortedFrequency = input.reduce(into: [String: Int](), { freq, char in
freq[String(char), default: 0] += 1
})
// create queue of initial Nodes
let queue = sortedFrequency.map{ Node(name: $0.key, value: $0.value)}
// generate key by traversing tree
return Huffman.generateKey(for: Huffman.createTree(with: queue), prefix: "")
}
static private func generateKey(for node: Node, prefix: String) -> [String: String] {
var key = [String: String]()
if let left = node.left, let right = node.right {
key.merge(generateKey(for: left, prefix: prefix + "0"), uniquingKeysWith: {current,_ in current})
key.merge(generateKey(for: right, prefix: prefix + "1"), uniquingKeysWith: {current,_ in current})
}else {
key[node.name] = prefix
}
return key
}
static private func createTree(with queue: [Node]) -> Node {
// initialize queue that sorts by decreasing count
var queue = PriorityQueue(queue: queue)
// until we have 1 root node, join subtrees of least frequency
while queue.count > 1 {
let node1 = queue.dequeue()
let node2 = queue.dequeue()
let rootNode = Huffman.createRoot(with: node1, and: node2)
queue.enqueue(node: rootNode)
}
return queue.queue[0]
}
static private func createRoot(with first: Node, and second: Node) -> Node {
return Node(name: "(first.name)(second.name)", value: first.value + second.value, left: first, right: second)
}
}
struct PriorityQueue {
var queue: [Node]
var count: Int {
return queue.count
}
mutating func enqueue(node: Node) {
queue.insert(node, at: queue.index(where: {$0.value <= node.value}) ?? 0)
}
mutating func dequeue() -> Node {
return queue.removeLast()
}
init(queue: [Node]){
// assumes queue will always be sorted by decreasing count
self.queue = queue.sorted(by: {$0.value > $1.value})
}
}
class Node: CustomStringConvertible {
var description: String {
return "(name): (value)"
}
let name: String
let value: Int
let left: Node?
let right: Node?
init(name: String, value: Int, left: Node? = nil, right: Node? = nil) {
self.name = name
self.value = value
self.left = left
self.right = right
}
}
algorithm swift compression
This question is a follow up question to: this post
I'd love some advice on my implementation and the API of the Huffman
class.
I'm also not sure how to test if my implementation is actually resulting in less bytes than a string. It seems that encoded.count
(as a Data) is larger than word.utf8.count
(as a String). Maybe I'm just not testing on large enough strings?
Also any thoughts on HuffData
storing the code
ex. ["00","01"]
and the frequencyTable
instead of the tree.
Here’s an example of how the API is used:
let word = "MISSISSIPPI_RIVER!"
let encoded = try? Huffman.encode(word)
let decoded = try? Huffman.decode(encoded!)
XCTAssertEqual(decoded, word)
Here's the code:
import Foundation
struct HuffData: Codable {
var code: [String]
var frequencyTable: [String: String]
}
class Huffman {
static func decode(_ data: Data) throws -> String {
do {
let huff = try JSONDecoder().decode(HuffData.self, from: data)
let reverseTable = Dictionary(uniqueKeysWithValues: zip(huff.frequencyTable.values, huff.frequencyTable.keys))
return huff.code.compactMap({ reverseTable[$0]}).joined()
}
catch let error {
throw error
}
}
static func encode(_ input: String) throws -> Data {
let frequencyTable = Huffman.buildFrequencyTable(for: input)
let code = input.compactMap({frequencyTable[String($0)]})
let huff = HuffData(code: code, frequencyTable: frequencyTable)
do {
let data = try JSONEncoder().encode(huff)
return data
}
catch let error {
throw error
}
}
static private func buildFrequencyTable(for input: String) -> [String: String] {
// count letter frequency
let sortedFrequency = input.reduce(into: [String: Int](), { freq, char in
freq[String(char), default: 0] += 1
})
// create queue of initial Nodes
let queue = sortedFrequency.map{ Node(name: $0.key, value: $0.value)}
// generate key by traversing tree
return Huffman.generateKey(for: Huffman.createTree(with: queue), prefix: "")
}
static private func generateKey(for node: Node, prefix: String) -> [String: String] {
var key = [String: String]()
if let left = node.left, let right = node.right {
key.merge(generateKey(for: left, prefix: prefix + "0"), uniquingKeysWith: {current,_ in current})
key.merge(generateKey(for: right, prefix: prefix + "1"), uniquingKeysWith: {current,_ in current})
}else {
key[node.name] = prefix
}
return key
}
static private func createTree(with queue: [Node]) -> Node {
// initialize queue that sorts by decreasing count
var queue = PriorityQueue(queue: queue)
// until we have 1 root node, join subtrees of least frequency
while queue.count > 1 {
let node1 = queue.dequeue()
let node2 = queue.dequeue()
let rootNode = Huffman.createRoot(with: node1, and: node2)
queue.enqueue(node: rootNode)
}
return queue.queue[0]
}
static private func createRoot(with first: Node, and second: Node) -> Node {
return Node(name: "(first.name)(second.name)", value: first.value + second.value, left: first, right: second)
}
}
struct PriorityQueue {
var queue: [Node]
var count: Int {
return queue.count
}
mutating func enqueue(node: Node) {
queue.insert(node, at: queue.index(where: {$0.value <= node.value}) ?? 0)
}
mutating func dequeue() -> Node {
return queue.removeLast()
}
init(queue: [Node]){
// assumes queue will always be sorted by decreasing count
self.queue = queue.sorted(by: {$0.value > $1.value})
}
}
class Node: CustomStringConvertible {
var description: String {
return "(name): (value)"
}
let name: String
let value: Int
let left: Node?
let right: Node?
init(name: String, value: Int, left: Node? = nil, right: Node? = nil) {
self.name = name
self.value = value
self.left = left
self.right = right
}
}
algorithm swift compression
algorithm swift compression
asked yesterday
Turnipdabeets
1265
1265
1
I don't know Swift well enough to be sure, but it seems like your output is 8 times as large as it should be, by neglecting to pack the bits into bytes. That way, since a Huffman code is at least 1 bit, the result is guaranteed to be at least as large as the input.
– harold
yesterday
oh no! I'm pretty sure I'm getting the correct zeros and ones from the tree so I'm not sure how the output is 8 times as large as it should be! I assumed joining the table with the code would add some overhead but would be worth it for strings that are large enough to consider encoding.
– Turnipdabeets
yesterday
1
@harold is quite right: Each zero and one of the encoded sequence is stored as a "0" or "1" character and thus consumes 8 bits.
– Martin R
yesterday
add a comment |
1
I don't know Swift well enough to be sure, but it seems like your output is 8 times as large as it should be, by neglecting to pack the bits into bytes. That way, since a Huffman code is at least 1 bit, the result is guaranteed to be at least as large as the input.
– harold
yesterday
oh no! I'm pretty sure I'm getting the correct zeros and ones from the tree so I'm not sure how the output is 8 times as large as it should be! I assumed joining the table with the code would add some overhead but would be worth it for strings that are large enough to consider encoding.
– Turnipdabeets
yesterday
1
@harold is quite right: Each zero and one of the encoded sequence is stored as a "0" or "1" character and thus consumes 8 bits.
– Martin R
yesterday
1
1
I don't know Swift well enough to be sure, but it seems like your output is 8 times as large as it should be, by neglecting to pack the bits into bytes. That way, since a Huffman code is at least 1 bit, the result is guaranteed to be at least as large as the input.
– harold
yesterday
I don't know Swift well enough to be sure, but it seems like your output is 8 times as large as it should be, by neglecting to pack the bits into bytes. That way, since a Huffman code is at least 1 bit, the result is guaranteed to be at least as large as the input.
– harold
yesterday
oh no! I'm pretty sure I'm getting the correct zeros and ones from the tree so I'm not sure how the output is 8 times as large as it should be! I assumed joining the table with the code would add some overhead but would be worth it for strings that are large enough to consider encoding.
– Turnipdabeets
yesterday
oh no! I'm pretty sure I'm getting the correct zeros and ones from the tree so I'm not sure how the output is 8 times as large as it should be! I assumed joining the table with the code would add some overhead but would be worth it for strings that are large enough to consider encoding.
– Turnipdabeets
yesterday
1
1
@harold is quite right: Each zero and one of the encoded sequence is stored as a "0" or "1" character and thus consumes 8 bits.
– Martin R
yesterday
@harold is quite right: Each zero and one of the encoded sequence is stored as a "0" or "1" character and thus consumes 8 bits.
– Martin R
yesterday
add a comment |
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
Thanks for contributing an answer to Code Review Stack Exchange!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
Use MathJax to format equations. MathJax reference.
To learn more, see our tips on writing great answers.
Some of your past answers have not been well-received, and you're in danger of being blocked from answering.
Please pay close attention to the following guidance:
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f209167%2fhuffman-coding-api-in-swift%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
1
I don't know Swift well enough to be sure, but it seems like your output is 8 times as large as it should be, by neglecting to pack the bits into bytes. That way, since a Huffman code is at least 1 bit, the result is guaranteed to be at least as large as the input.
– harold
yesterday
oh no! I'm pretty sure I'm getting the correct zeros and ones from the tree so I'm not sure how the output is 8 times as large as it should be! I assumed joining the table with the code would add some overhead but would be worth it for strings that are large enough to consider encoding.
– Turnipdabeets
yesterday
1
@harold is quite right: Each zero and one of the encoded sequence is stored as a "0" or "1" character and thus consumes 8 bits.
– Martin R
yesterday