Huffman Coding API in Swift

up vote
0
down vote

favorite

This question is a follow up question to: this post

I'd love some advice on my implementation and the API of the Huffman class.

I'm also not sure how to test if my implementation is actually resulting in less bytes than a string. It seems that encoded.count (as a Data) is larger than word.utf8.count (as a String). Maybe I'm just not testing on large enough strings?

Also any thoughts on HuffData storing the code ex. ["00","01"] and the frequencyTable instead of the tree.

Here’s an example of how the API is used:

let word = "MISSISSIPPI_RIVER!"

let encoded = try? Huffman.encode(word)

let decoded = try? Huffman.decode(encoded!)

XCTAssertEqual(decoded, word)

Here's the code:

import Foundation



struct HuffData: Codable {

    var code: [String]

    var frequencyTable: [String: String]

}



class Huffman {

    static func decode(_ data: Data) throws -> String {

        do {

            let huff = try JSONDecoder().decode(HuffData.self, from: data)

            let reverseTable = Dictionary(uniqueKeysWithValues: zip(huff.frequencyTable.values, huff.frequencyTable.keys))

            return huff.code.compactMap({ reverseTable[$0]}).joined()

        }

        catch let error {

            throw error

        }

    }



    static func encode(_ input: String) throws -> Data {

        let frequencyTable = Huffman.buildFrequencyTable(for: input)

        let code = input.compactMap({frequencyTable[String($0)]})

        let huff = HuffData(code: code, frequencyTable: frequencyTable)

        do {

            let data = try JSONEncoder().encode(huff)

            return data

        }

        catch let error {

            throw error

        }

    }



    static private func buildFrequencyTable(for input: String) -> [String: String] {

        // count letter frequency

        let sortedFrequency = input.reduce(into: [String: Int](), { freq, char in

            freq[String(char), default: 0] += 1

        })

        // create queue of initial Nodes

        let queue = sortedFrequency.map{ Node(name: $0.key, value: $0.value)}

        // generate key by traversing tree

        return Huffman.generateKey(for: Huffman.createTree(with: queue), prefix: "")

    }



    static private func generateKey(for node: Node, prefix: String) -> [String: String] {

        var key = [String: String]()

        if let left = node.left, let right = node.right {

            key.merge(generateKey(for: left, prefix: prefix + "0"), uniquingKeysWith: {current,_ in current})

            key.merge(generateKey(for: right, prefix: prefix + "1"), uniquingKeysWith: {current,_ in current})

        }else {

            key[node.name] = prefix

        }

        return key

    }



    static private func createTree(with queue: [Node]) -> Node {

        // initialize queue that sorts by decreasing count

        var queue = PriorityQueue(queue: queue)

        // until we have 1 root node, join subtrees of least frequency

        while queue.count > 1 {

            let node1 = queue.dequeue()

            let node2 = queue.dequeue()

            let rootNode = Huffman.createRoot(with: node1, and: node2)

            queue.enqueue(node: rootNode)

        }

        return queue.queue[0]

    }



    static private func createRoot(with first: Node, and second: Node) -> Node {

        return Node(name: "(first.name)(second.name)", value: first.value + second.value, left: first, right: second)

    }



}



struct PriorityQueue {

    var queue: [Node]

    var count: Int {

        return queue.count

    }

    mutating func enqueue(node: Node) {

        queue.insert(node, at: queue.index(where: {$0.value <= node.value}) ?? 0)

    }

    mutating func dequeue() -> Node {

        return queue.removeLast()

    }

    init(queue: [Node]){

        // assumes queue will always be sorted by decreasing count

        self.queue = queue.sorted(by: {$0.value > $1.value})

    }

}



class Node: CustomStringConvertible {

    var description: String {

        return "(name): (value)"

    }

    let name: String

    let value: Int

    let left: Node?

    let right: Node?



    init(name: String, value: Int, left: Node? = nil, right: Node? = nil) {

        self.name = name

        self.value = value

        self.left = left

        self.right = right

    }

}

asked yesterday

Turnipdabeets

1265

1

I don't know Swift well enough to be sure, but it seems like your output is 8 times as large as it should be, by neglecting to pack the bits into bytes. That way, since a Huffman code is at least 1 bit, the result is guaranteed to be at least as large as the input.
– harold
yesterday

oh no! I'm pretty sure I'm getting the correct zeros and ones from the tree so I'm not sure how the output is 8 times as large as it should be! I assumed joining the table with the code would add some overhead but would be worth it for strings that are large enough to consider encoding.
– Turnipdabeets
yesterday

1

@harold is quite right: Each zero and one of the encoded sequence is stored as a "0" or "1" character and thus consumes 8 bits.
– Martin R
yesterday

add a comment |

up vote
0
down vote

favorite

This question is a follow up question to: this post

I'd love some advice on my implementation and the API of the Huffman class.

Also any thoughts on HuffData storing the code ex. ["00","01"] and the frequencyTable instead of the tree.

Here’s an example of how the API is used:

let word = "MISSISSIPPI_RIVER!"

let encoded = try? Huffman.encode(word)

let decoded = try? Huffman.decode(encoded!)

XCTAssertEqual(decoded, word)

Here's the code:

import Foundation



struct HuffData: Codable {

    var code: [String]

    var frequencyTable: [String: String]

}



class Huffman {

    static func decode(_ data: Data) throws -> String {

        do {

            let huff = try JSONDecoder().decode(HuffData.self, from: data)

            let reverseTable = Dictionary(uniqueKeysWithValues: zip(huff.frequencyTable.values, huff.frequencyTable.keys))

            return huff.code.compactMap({ reverseTable[$0]}).joined()

        }

        catch let error {

            throw error

        }

    }



    static func encode(_ input: String) throws -> Data {

        let frequencyTable = Huffman.buildFrequencyTable(for: input)

        let code = input.compactMap({frequencyTable[String($0)]})

        let huff = HuffData(code: code, frequencyTable: frequencyTable)

        do {

            let data = try JSONEncoder().encode(huff)

            return data

        }

        catch let error {

            throw error

        }

    }



    static private func buildFrequencyTable(for input: String) -> [String: String] {

        // count letter frequency

        let sortedFrequency = input.reduce(into: [String: Int](), { freq, char in

            freq[String(char), default: 0] += 1

        })

        // create queue of initial Nodes

        let queue = sortedFrequency.map{ Node(name: $0.key, value: $0.value)}

        // generate key by traversing tree

        return Huffman.generateKey(for: Huffman.createTree(with: queue), prefix: "")

    }



    static private func generateKey(for node: Node, prefix: String) -> [String: String] {

        var key = [String: String]()

        if let left = node.left, let right = node.right {

            key.merge(generateKey(for: left, prefix: prefix + "0"), uniquingKeysWith: {current,_ in current})

            key.merge(generateKey(for: right, prefix: prefix + "1"), uniquingKeysWith: {current,_ in current})

        }else {

            key[node.name] = prefix

        }

        return key

    }



    static private func createTree(with queue: [Node]) -> Node {

        // initialize queue that sorts by decreasing count

        var queue = PriorityQueue(queue: queue)

        // until we have 1 root node, join subtrees of least frequency

        while queue.count > 1 {

            let node1 = queue.dequeue()

            let node2 = queue.dequeue()

            let rootNode = Huffman.createRoot(with: node1, and: node2)

            queue.enqueue(node: rootNode)

        }

        return queue.queue[0]

    }



    static private func createRoot(with first: Node, and second: Node) -> Node {

        return Node(name: "(first.name)(second.name)", value: first.value + second.value, left: first, right: second)

    }



}



struct PriorityQueue {

    var queue: [Node]

    var count: Int {

        return queue.count

    }

    mutating func enqueue(node: Node) {

        queue.insert(node, at: queue.index(where: {$0.value <= node.value}) ?? 0)

    }

    mutating func dequeue() -> Node {

        return queue.removeLast()

    }

    init(queue: [Node]){

        // assumes queue will always be sorted by decreasing count

        self.queue = queue.sorted(by: {$0.value > $1.value})

    }

}



class Node: CustomStringConvertible {

    var description: String {

        return "(name): (value)"

    }

    let name: String

    let value: Int

    let left: Node?

    let right: Node?



    init(name: String, value: Int, left: Node? = nil, right: Node? = nil) {

        self.name = name

        self.value = value

        self.left = left

        self.right = right

    }

}

asked yesterday

Turnipdabeets

1265

1

I don't know Swift well enough to be sure, but it seems like your output is 8 times as large as it should be, by neglecting to pack the bits into bytes. That way, since a Huffman code is at least 1 bit, the result is guaranteed to be at least as large as the input.
– harold
yesterday

oh no! I'm pretty sure I'm getting the correct zeros and ones from the tree so I'm not sure how the output is 8 times as large as it should be! I assumed joining the table with the code would add some overhead but would be worth it for strings that are large enough to consider encoding.
– Turnipdabeets
yesterday

1

@harold is quite right: Each zero and one of the encoded sequence is stored as a "0" or "1" character and thus consumes 8 bits.
– Martin R
yesterday

add a comment |

up vote
0
down vote

favorite

This question is a follow up question to: this post

I'd love some advice on my implementation and the API of the Huffman class.

Also any thoughts on HuffData storing the code ex. ["00","01"] and the frequencyTable instead of the tree.

Here’s an example of how the API is used:

let word = "MISSISSIPPI_RIVER!"

let encoded = try? Huffman.encode(word)

let decoded = try? Huffman.decode(encoded!)

XCTAssertEqual(decoded, word)

Here's the code:

import Foundation



struct HuffData: Codable {

    var code: [String]

    var frequencyTable: [String: String]

}



class Huffman {

    static func decode(_ data: Data) throws -> String {

        do {

            let huff = try JSONDecoder().decode(HuffData.self, from: data)

            let reverseTable = Dictionary(uniqueKeysWithValues: zip(huff.frequencyTable.values, huff.frequencyTable.keys))

            return huff.code.compactMap({ reverseTable[$0]}).joined()

        }

        catch let error {

            throw error

        }

    }



    static func encode(_ input: String) throws -> Data {

        let frequencyTable = Huffman.buildFrequencyTable(for: input)

        let code = input.compactMap({frequencyTable[String($0)]})

        let huff = HuffData(code: code, frequencyTable: frequencyTable)

        do {

            let data = try JSONEncoder().encode(huff)

            return data

        }

        catch let error {

            throw error

        }

    }



    static private func buildFrequencyTable(for input: String) -> [String: String] {

        // count letter frequency

        let sortedFrequency = input.reduce(into: [String: Int](), { freq, char in

            freq[String(char), default: 0] += 1

        })

        // create queue of initial Nodes

        let queue = sortedFrequency.map{ Node(name: $0.key, value: $0.value)}

        // generate key by traversing tree

        return Huffman.generateKey(for: Huffman.createTree(with: queue), prefix: "")

    }



    static private func generateKey(for node: Node, prefix: String) -> [String: String] {

        var key = [String: String]()

        if let left = node.left, let right = node.right {

            key.merge(generateKey(for: left, prefix: prefix + "0"), uniquingKeysWith: {current,_ in current})

            key.merge(generateKey(for: right, prefix: prefix + "1"), uniquingKeysWith: {current,_ in current})

        }else {

            key[node.name] = prefix

        }

        return key

    }



    static private func createTree(with queue: [Node]) -> Node {

        // initialize queue that sorts by decreasing count

        var queue = PriorityQueue(queue: queue)

        // until we have 1 root node, join subtrees of least frequency

        while queue.count > 1 {

            let node1 = queue.dequeue()

            let node2 = queue.dequeue()

            let rootNode = Huffman.createRoot(with: node1, and: node2)

            queue.enqueue(node: rootNode)

        }

        return queue.queue[0]

    }



    static private func createRoot(with first: Node, and second: Node) -> Node {

        return Node(name: "(first.name)(second.name)", value: first.value + second.value, left: first, right: second)

    }



}



struct PriorityQueue {

    var queue: [Node]

    var count: Int {

        return queue.count

    }

    mutating func enqueue(node: Node) {

        queue.insert(node, at: queue.index(where: {$0.value <= node.value}) ?? 0)

    }

    mutating func dequeue() -> Node {

        return queue.removeLast()

    }

    init(queue: [Node]){

        // assumes queue will always be sorted by decreasing count

        self.queue = queue.sorted(by: {$0.value > $1.value})

    }

}



class Node: CustomStringConvertible {

    var description: String {

        return "(name): (value)"

    }

    let name: String

    let value: Int

    let left: Node?

    let right: Node?



    init(name: String, value: Int, left: Node? = nil, right: Node? = nil) {

        self.name = name

        self.value = value

        self.left = left

        self.right = right

    }

}

asked yesterday

Turnipdabeets

1265

This question is a follow up question to: this post

I'd love some advice on my implementation and the API of the Huffman class.

Also any thoughts on HuffData storing the code ex. ["00","01"] and the frequencyTable instead of the tree.

Here’s an example of how the API is used:

let word = "MISSISSIPPI_RIVER!"

let encoded = try? Huffman.encode(word)

let decoded = try? Huffman.decode(encoded!)

XCTAssertEqual(decoded, word)

Here's the code:

import Foundation



struct HuffData: Codable {

    var code: [String]

    var frequencyTable: [String: String]

}



class Huffman {

    static func decode(_ data: Data) throws -> String {

        do {

            let huff = try JSONDecoder().decode(HuffData.self, from: data)

            let reverseTable = Dictionary(uniqueKeysWithValues: zip(huff.frequencyTable.values, huff.frequencyTable.keys))

            return huff.code.compactMap({ reverseTable[$0]}).joined()

        }

        catch let error {

            throw error

        }

    }



    static func encode(_ input: String) throws -> Data {

        let frequencyTable = Huffman.buildFrequencyTable(for: input)

        let code = input.compactMap({frequencyTable[String($0)]})

        let huff = HuffData(code: code, frequencyTable: frequencyTable)

        do {

            let data = try JSONEncoder().encode(huff)

            return data

        }

        catch let error {

            throw error

        }

    }



    static private func buildFrequencyTable(for input: String) -> [String: String] {

        // count letter frequency

        let sortedFrequency = input.reduce(into: [String: Int](), { freq, char in

            freq[String(char), default: 0] += 1

        })

        // create queue of initial Nodes

        let queue = sortedFrequency.map{ Node(name: $0.key, value: $0.value)}

        // generate key by traversing tree

        return Huffman.generateKey(for: Huffman.createTree(with: queue), prefix: "")

    }



    static private func generateKey(for node: Node, prefix: String) -> [String: String] {

        var key = [String: String]()

        if let left = node.left, let right = node.right {

            key.merge(generateKey(for: left, prefix: prefix + "0"), uniquingKeysWith: {current,_ in current})

            key.merge(generateKey(for: right, prefix: prefix + "1"), uniquingKeysWith: {current,_ in current})

        }else {

            key[node.name] = prefix

        }

        return key

    }



    static private func createTree(with queue: [Node]) -> Node {

        // initialize queue that sorts by decreasing count

        var queue = PriorityQueue(queue: queue)

        // until we have 1 root node, join subtrees of least frequency

        while queue.count > 1 {

            let node1 = queue.dequeue()

            let node2 = queue.dequeue()

            let rootNode = Huffman.createRoot(with: node1, and: node2)

            queue.enqueue(node: rootNode)

        }

        return queue.queue[0]

    }



    static private func createRoot(with first: Node, and second: Node) -> Node {

        return Node(name: "(first.name)(second.name)", value: first.value + second.value, left: first, right: second)

    }



}



struct PriorityQueue {

    var queue: [Node]

    var count: Int {

        return queue.count

    }

    mutating func enqueue(node: Node) {

        queue.insert(node, at: queue.index(where: {$0.value <= node.value}) ?? 0)

    }

    mutating func dequeue() -> Node {

        return queue.removeLast()

    }

    init(queue: [Node]){

        // assumes queue will always be sorted by decreasing count

        self.queue = queue.sorted(by: {$0.value > $1.value})

    }

}



class Node: CustomStringConvertible {

    var description: String {

        return "(name): (value)"

    }

    let name: String

    let value: Int

    let left: Node?

    let right: Node?



    init(name: String, value: Int, left: Node? = nil, right: Node? = nil) {

        self.name = name

        self.value = value

        self.left = left

        self.right = right

    }

}

algorithm swift compression

asked yesterday

Turnipdabeets

1265

asked yesterday

Turnipdabeets

1265

asked yesterday

Turnipdabeets

1265

asked yesterday

Turnipdabeets

1265

asked yesterday

Turnipdabeets

1265

1

I don't know Swift well enough to be sure, but it seems like your output is 8 times as large as it should be, by neglecting to pack the bits into bytes. That way, since a Huffman code is at least 1 bit, the result is guaranteed to be at least as large as the input.
– harold
yesterday

oh no! I'm pretty sure I'm getting the correct zeros and ones from the tree so I'm not sure how the output is 8 times as large as it should be! I assumed joining the table with the code would add some overhead but would be worth it for strings that are large enough to consider encoding.
– Turnipdabeets
yesterday

1

@harold is quite right: Each zero and one of the encoded sequence is stored as a "0" or "1" character and thus consumes 8 bits.
– Martin R
yesterday

add a comment |

1

I don't know Swift well enough to be sure, but it seems like your output is 8 times as large as it should be, by neglecting to pack the bits into bytes. That way, since a Huffman code is at least 1 bit, the result is guaranteed to be at least as large as the input.
– harold
yesterday

oh no! I'm pretty sure I'm getting the correct zeros and ones from the tree so I'm not sure how the output is 8 times as large as it should be! I assumed joining the table with the code would add some overhead but would be worth it for strings that are large enough to consider encoding.
– Turnipdabeets
yesterday

1

@harold is quite right: Each zero and one of the encoded sequence is stored as a "0" or "1" character and thus consumes 8 bits.
– Martin R
yesterday

I don't know Swift well enough to be sure, but it seems like your output is 8 times as large as it should be, by neglecting to pack the bits into bytes. That way, since a Huffman code is at least 1 bit, the result is guaranteed to be at least as large as the input.
– harold
yesterday

oh no! I'm pretty sure I'm getting the correct zeros and ones from the tree so I'm not sure how the output is 8 times as large as it should be! I assumed joining the table with the code would add some overhead but would be worth it for strings that are large enough to consider encoding.
– Turnipdabeets
yesterday

@harold is quite right: Each zero and one of the encoded sequence is stored as a "0" or "1" character and thus consumes 8 bits.
– Martin R
yesterday

add a comment |

active

oldest

votes

Your Answer

StackExchange.ifUsing("editor", function () {
return StackExchange.using("mathjaxEditing", function () {
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
});
});
}, "mathjax-editing");

StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");

StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "196"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});

function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
convertImagesToLinks: false,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});

}
});

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f209167%2fhuffman-coding-api-in-swift%23new-answer', 'question_page');
}
);

Post as a guest

Name

Required, but never shown

active

oldest

votes

draft saved

draft discarded

Thanks for contributing an answer to Code Review Stack Exchange!

Please be sure to answer the question. Provide details and share your research!

But avoid …

Asking for help, clarification, or responding to other answers.

Making statements based on opinion; back them up with references or personal experience.

Use MathJax to format equations. MathJax reference.

To learn more, see our tips on writing great answers.

Some of your past answers have not been well-received, and you're in danger of being blocked from answering.

Please pay close attention to the following guidance:

Please be sure to answer the question. Provide details and share your research!

But avoid …

Asking for help, clarification, or responding to other answers.

Making statements based on opinion; back them up with references or personal experience.

To learn more, see our tips on writing great answers.

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Name

Required, but never shown

Name

Required, but never shown

This page is only for reference, If you need detailed information, please check here

hR,FmP kNN FbyWDVG9Lzo1dgJdWDVEGzd5cyxjBTALPYbCR2QOfvambExRvaV7FYezM QjjUBxRtmvf,ADDgrOy TsHgSYJ RdSoBoCL

搜尋此網誌

Krdytkyu