Untitled

                Never    
#!/usr/bin/env python
# coding: utf-8

# In[20]:


from queue import PriorityQueue
from dataclasses import dataclass, field
from typing import Any

@dataclass(order=True)
class PrioritizedItem:
    priority: int
    item: Any=field(compare=False)


# In[21]:


def frequences(phrase):
    lettres = set(phrase)
    return [ PrioritizedItem(priority=phrase.count(l), item=l) for l in lettres ]


# In[17]:


def foret_initiale(freq):
    q = PriorityQueue()
    for couple in freq:
        q.put(couple)
    return q


# In[39]:


def construit_arbre(foret):
    while True:
        min1 = foret.get()
        if foret.empty(): # assez inélegant mais qsize est approximatif donc c'est plus fiable
            return min1.item
        min2 = foret.get()
        node = PrioritizedItem(priority=min1.priority+min2.priority,
                               item=(min1.item, min1.priority+min2.priority, min2.item))
        foret.put(node)


# In[40]:


def codes_assoc(arbre, prefix=''):
    if type(arbre) == tuple:
        gauche, _, droite = arbre
        return codes_assoc(gauche, prefix=prefix+'0')+codes_assoc(droite, prefix=prefix+'1')
    return [ (arbre, prefix) ]


# In[41]:


def codes_dict(codes_assoc):
    return { l : c for (l,c) in codes_assoc }


# In[77]:


def encode(phrase, codes):
    return '|'.join([ codes[c] for c in phrase])


# In[78]:


def pp_arbre_l(arbre):
    if type(arbre) == tuple:
        gauche, p, droite = arbre
        
        sp = str(p)
        
        pp_g, w_g, r_g = pp_arbre_l(gauche)
        pp_d, w_d, r_d = pp_arbre_l(droite)
        
        l = [ 
            ' ' * r_g + '-' * (w_g-r_g) + ' ' + sp + ' ' + '-' * r_d + ' ' * (w_d-r_d),
            ' ' * r_g + '|' + ' ' * (w_g-r_g) + ' ' * len(sp) + ' ' * r_d + '|' + ' ' * (w_d-r_d)
        ]
        for i in range(max(len(pp_g), len(pp_d))):
            s = ''
            if i >= len(pp_g):
                s += ' ' * w_g
            else:
                s += pp_g[i]
            s += ' ' * (2 + len(sp))
            if i >= len(pp_d):
                s += ' ' * w_d
            else:
                s += pp_d[i]
            l.append(s)
        return l, w_g+w_d+len(sp)+2, w_g+1
        
    return [ "'"+arbre+"'" ], 2 + len(arbre), 1

def pp_arbre(arbre):
    return '\n'.join(pp_arbre_l(arbre)[0])


# In[79]:


phrase = "this is an example of a huffman tree"
phrase = "A_DEAD_DAD_CEDED_A_BAD_BABE_A_BEADED_ABACA_BED"
freq = frequences(phrase)
foret = foret_initiale(freq)
arbre = construit_arbre(foret)
d = codes_dict(codes_assoc(arbre))
comp = encode(phrase, d)
print(pp_arbre(arbre))
d, comp, len(comp) - len(phrase) + 1

Raw Text