Wednesday, September 28, 2016

Data Structure and Algorithm - Trie

Referring to my C++ Trie implementation for data structure and algorithm of Trie. This post is only about the implementation of Trie in Python

The implementation is based on Python 2.7 and the development environment is based on Microsoft Visual Studio Community 2015.

1. Python Implementation
# ********************************************************************************
# Implementation
# ********************************************************************************
# DictionaryTrie.py

import sys

class DictionaryTrie(object):
    """DictionaryTrie class"""

    def __init__(self):
        self.value = None
        self.children = dict()
        for idx in range(0, 26):
            self.children[ord('a') + idx] = None

    def GetChildren(self):
        return self.children
    
    def GetValue(self):
        return self.value          
    
    # add a list of words into Trie
    def AddWords(self, words = None):
        if words is None:
            return
        try:
            for word in words:
                self.AddWord(word)
        except AttributeError as e:
            print ("AttributeError: {0}".format(e))
        except:
            print ("Unexpected Error: ", sys.exc_info()[0])

    # add a single word
    def AddWord(self, word = None):
        if word is None:
            return
        try:
            leafNode = self;
            for char in map(ord, word):
                if leafNode.children[char] is None:
                    leafNode.children[char] = DictionaryTrie()
                leafNode = leafNode.children[char]
            if leafNode.value is None:
                leafNode.value = word;
        except AttributeError as e:
            print ("AttributeError: {0}".format(e))
        except:
            print ("Unexpected error: ", sys.exc_info()[0])
    
    # find if a word exists in  Trie
    def FindWord(self, word):
        foundNode = self.FindWordAndGetNode(word)
        return foundNode is not None

    # find the word and return the node point to it
    # otherwise return None
    def FindWordAndGetNode(self, word):
        try:
            tempNode = self;
            for char in map(ord, word):
                if tempNode.children[char] is None:
                    return None
                tempNode = tempNode.children[char]
            if tempNode.value == word:
                return tempNode
            return None
        except AttributeError as e:
            print ("AttributeError: {0}".format(e))
        except:
            print ("Unexpected error: ", sys.exc_info()[0])

    # remove the word from Trie
    def RemoveWord(self, word):
        try:
            wordNode = self.FindWordAndGetNode(word)
            if wordNode is not None:
                wordNode.value = None
        except AttributeError as e:
            print ("AttributeError: {0}".format(e))
        except:
            print ("Unexpected error: ", sys.exc_info()[0])

    def __Traverse(self, root, result):
        try:
            for char in range(0, 26):
                tmpNode = root.children[ord('a') + char]
                if  tmpNode is not None:
                    if tmpNode.value is not None:
                        result.append(tmpNode.value)
                    self.__Traverse(tmpNode, result)
        except AttributeError as e:
            print ("AttributeError: {0}".format(e))
        except:
            print ("Unexpected error: ", sys.exc_info()[0])        
        
    # return the full list of words in Trie
    def Traverse(self):
        result = []
        self.__Traverse(self, result)
        if len(result) == 0:
            return None
        return result

    def __FindPrefix(self, prefix):
        try:
            tempNode = self;
            for char in map(ord, prefix):
                if tempNode.children[char] is None:
                    return None
                tempNode = tempNode.children[char]
            return tempNode
        except AttributeError as e:
            print ("AttributeError: {0}".format(e))
        except:
            print ("Unexpected error: ", sys.exc_info()[0])    

    # Return the list of words in Trie that starting with "prefix"
    def QueryPrefix(self, prefix):
        tempNode = self.__FindPrefix(prefix)
        
        if tempNode is not None:
            result = []
            self.__Traverse(tempNode, result)
            if len(result) > 0:
                return result
        return None


2. Unit Test
# ********************************************************************************
# Test
# ********************************************************************************
# Test_DictionaryTrie.py

import unittest

from DictionaryTrie import DictionaryTrie

class Test_DictionaryTrie(unittest.TestCase):
    def __init__(self, methodName = 'runTest'):
        super(Test_DictionaryTrie, self).__init__(methodName)
        self.m_Dict = DictionaryTrie()
        self.m_Dict.AddWord("apple")
        self.m_Dict.AddWord("orange")
        self.m_Dict.AddWord("pear")
        self.m_Dict.AddWords(("banana", "melon", "grape", "blueberry", "blue"))

    def test_Find(self):
        testDict = self.m_Dict
        self.assertTrue(testDict.FindWord("apple"))
        self.assertFalse(testDict.FindWord("sdfa"))
        
        pear = testDict.FindWordAndGetNode("pear")
        self.assertIsNotNone(pear)
        self.assertEqual(pear.GetValue(), "pear")
        for idx in range(0, 26):
            self.assertIsNone(pear.GetChildren()[ord('a')+idx])

        blue = testDict.FindWordAndGetNode("blue")
        self.assertIsNotNone(blue)
        self.assertEqual(blue.GetValue(), "blue")
        for idx in range(0, 26):
            if (ord('a') + idx) == ord('b'):
                self.assertIsNotNone(blue.GetChildren()[ord('a') + idx])
            else:
                self.assertIsNone(pear.GetChildren()[ord('a') + idx])

    def test_Remove(self):
        testDict = self.m_Dict
        testDict.RemoveWord("apple")
        self.assertFalse(testDict.FindWord("apple"))
        testDict.AddWord("apple")
        self.assertTrue(testDict.FindWord("apple"))

    def test_Traverse(self):
        testDict = self.m_Dict
        result = testDict.Traverse()
        self.assertEqual(result[0], "apple")
        self.assertEqual(result[1], "banana")
        self.assertEqual(result[2], "blue")
        self.assertEqual(result[3], "blueberry")
        self.assertEqual(result[4], "grape")
        self.assertEqual(result[5], "melon")
        self.assertEqual(result[6], "orange")
        self.assertEqual(result[7], "pear")
        self.assertEqual(len(result), 8)

    def test_QueryPrefix(self):
        testDict = self.m_Dict
        result = testDict.QueryPrefix("app")
        self.assertEqual(result[0], "apple")
        self.assertEqual(len(result), 1)
        result = testDict.QueryPrefix("adj")
        self.assertIsNone(result)
        result = testDict.QueryPrefix("blu")
        self.assertEqual(result[0], "blue")
        self.assertEqual(result[1], "blueberry")
                        
if __name__ == '__main__':
    unittest.main()


No comments:

Post a Comment