Skip to content

add two methods in tree.py mainly #169

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 46 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
45d6550
Update tree.py
Freakwill Sep 15, 2020
5024c90
Create random_tree.py
Freakwill Jun 21, 2021
545a66d
Update tree.py
Freakwill Jun 21, 2021
82d0614
Update tree.py
Freakwill Jun 21, 2021
c33380f
Update random_tree.py
Freakwill Jun 21, 2021
6f816bf
Update tree.py
Freakwill Apr 29, 2022
a91d105
Update tree.py
Freakwill Apr 29, 2022
5c7bbbd
Update random_tree.py
Freakwill Apr 29, 2022
bb22e6b
Merge branch 'caesar0301:master' into master
Freakwill Apr 29, 2022
ce6f8bd
Update tree.py
Freakwill Apr 29, 2022
f9b8f04
update
Freakwill Oct 28, 2022
2941d9f
Merge branch 'master' into master
Freakwill Apr 11, 2024
5fbe23b
Create cluster_tree
Freakwill Apr 11, 2024
de4b6ed
Update tree.py
Freakwill Apr 11, 2024
fe92144
Update cluster_tree
Freakwill Apr 11, 2024
d5a5f9b
Update random_tree.py
Freakwill Apr 11, 2024
ee1b1f6
Update random_tree.py
Freakwill Apr 11, 2024
1716b61
Update tree.py
Freakwill Apr 11, 2024
4fa9690
Update tree.py
Freakwill Apr 11, 2024
ad255c3
Update tree.py
Freakwill Apr 11, 2024
c8a9d5b
Update random_tree.py
Freakwill Apr 11, 2024
dbbb10c
Update random_tree.py
Freakwill Apr 11, 2024
574fcee
Update random_tree.py
Freakwill Apr 11, 2024
b53ac6f
Update random_tree.py
Freakwill Apr 11, 2024
004cce6
Update random_tree.py
Freakwill Apr 11, 2024
734f468
Update random_tree.py
Freakwill Apr 11, 2024
0b42017
Update tree.py
Freakwill Apr 11, 2024
9665b3a
Update tree.py
Freakwill Apr 11, 2024
4b27b71
Create huffman_tree.py
Freakwill Apr 11, 2024
3532fd1
Update huffman_tree.py
Freakwill Apr 11, 2024
31e4940
Update huffman_tree.py
Freakwill Apr 11, 2024
f4b60fe
Update huffman_tree.py
Freakwill Apr 11, 2024
3f5c683
Update huffman_tree.py
Freakwill Apr 11, 2024
ed65d07
Update huffman_tree.py
Freakwill Apr 11, 2024
89917f8
Update huffman_tree.py
Freakwill Apr 11, 2024
cb2829f
Update huffman_tree.py
Freakwill Apr 11, 2024
4a82ee6
Update huffman_tree.py
Freakwill Apr 11, 2024
baa4bb3
Update huffman_tree.py
Freakwill Apr 11, 2024
7395ba6
Update huffman_tree.py
Freakwill Apr 11, 2024
dc6a0ee
Update huffman_tree.py
Freakwill Apr 11, 2024
8e0743a
Update huffman_tree.py
Freakwill Apr 11, 2024
efa63b0
Update huffman_tree.py
Freakwill Apr 11, 2024
5787d5b
Update huffman_tree.py
Freakwill Apr 11, 2024
0580b0f
Update huffman_tree.py
Freakwill Apr 11, 2024
d6733b6
Update tree.py
Freakwill Apr 13, 2024
49f8b34
Update node.py
Freakwill Apr 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 94 additions & 0 deletions examples/cluster_tree
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/usr/bin/env python3

"""Clustering Model Based on Decision Tree
"""

import numpy as np
import numpy.linalg as LA
from scipy.stats import entropy
from scipy.spatial.distance import cdist

import pandas as pd

from treelib import Node, Tree

from sklearn.base import ClusterMixin, BaseEstimator
from sklearn.cluster import KMeans


class TreeCluster(BaseEstimator, ClusterMixin, Tree):
"""Decision Tree for classification/cluster

epsilon: the threshold of info gain or other select method
selection_method: the selection method
features_: the features of the input vars
classes_: the classes of output vars
"""

def __init__(self, epsilon=0.6, features=None, classes=None, *args, **kwargs):
super().__init__(*args, **kwargs)
self.epsilon = epsilon
self.features_ = features
self.classes_ = classes

def fit(self, X, Y=None, mean=None, level=()):
"""
calc cond_proba, proba, priori_proba, features
then call fit_with_proba

Arguments:
X {2D array|list|dataframe} -- input vars

Returns:
TreeCluster
"""

kmeans = KMeans(n_clusters=2)

if mean is None:
mean = X.mean(axis=0)
self.add_node(Node(tag='-'.join(map(str, level)), identifier=level, data={'mean':mean}))

if len(X)>2:
kmeans.fit(X)
y = kmeans.predict(X)
classes_ = np.unique(y)
means_ = kmeans.cluster_centers_

gain = 1 - kmeans.inertia_ / LA.norm(X - mean, 'fro')**2

if gain > self.epsilon:
for k, m in zip(classes_, means_):
t = TreeCluster(epsilon=self.epsilon)
t.fit(X[y==k], mean=m, level=level+(k,))
self.paste(level, t)

if level == ():
# get cluster centers from the data of the nodes
self.cluster_centers_ = [node.data['mean'] for node in self.all_nodes_itr() if node.is_leaf()]
self.classes_ = np.arange(len(self.cluster_centers_))

return self

def predict_proba(self, X):
distances = np.exp(-cdist(X, self.cluster_centers_))
return distances / distances.sum(axis=0)[None,:]

def predict(self, X):
p = self.predict_proba(X)
return self.classes_[np.argmax(p, axis=1)]


if __name__ == '__main__':

from sklearn import datasets

iris = datasets.load_iris()
X_train, y_train = iris.data, iris.target

tc = TreeCluster(epsilon=0.5)
tc.fit(X_train)
y_ = tc.predict(X_train)

print(tc)
print(tc.cluster_centers_)
124 changes: 124 additions & 0 deletions examples/huffman_tree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#!/usr/bin/env python


"""Huffman coding
"""

from toolz import concat
from treelib import Tree, Node

import numpy as np


def _get_symbols(tree):
"""Get `symbols` from the root of a tree or a node

tree: Tree or Node
"""
if isinstance(tree, Node):
a = tree.data["symbols"]
else:
a = tree.get_node(tree.root).data["symbols"]
if isinstance(a, str):
return [a]
else:
return a


def _get_frequency(tree):
"""Get `frequency` from the root of a tree or a node

tree: Tree or Node
"""
if isinstance(tree, Node):
a = tree.data["frequency"]
else:
a = tree.get_node(tree.root).data["frequency"]
if isinstance(a, str):
return [a]
else:
return a


def merge(trees, level=""):
"""merge the trees to one tree by add a root

Args:
trees (list): list of trees or nodes
level (tuple, optional): the prefix for identifier

Returns:
Tree
"""

data = list(concat(map(_get_symbols, trees)))
freq = sum(map(_get_frequency, trees))
t = Tree()
root = Node(identifier=level, data={"symbols": data, "frequency": freq, "code": ""})
t.add_node(root)
t.root = level
root.tag = f"root: {{{','.join(root.data['symbols'])}}}/{root.data['frequency']}"
for k, tree in enumerate(trees):
if isinstance(tree, Node):
tree.identifier = f"{k}" + tree.identifier
tree.data["code"] = f"{k}" + tree.data["code"]
tree.tag = f"{tree.data['code']}: {{{','.join(tree.data['symbols'])}}}/{tree.data['frequency']}"
t.add_node(tree, parent=level)
else:
for n in tree.all_nodes_itr():
n.identifier = f"{k}" + n.identifier
n.data["code"] = f"{k}" + n.data["code"]
n.tag = f"{n.data['code']}: {{{','.join(n.data['symbols'])}}}/{n.data['frequency']}"

tree._nodes = {n.identifier: n for k, n in tree._nodes.items()}
tree.root = f"{k}{tree.root}"
tid = tree.identifier
for n in tree.all_nodes_itr():
if n.is_root():
n.set_successors([f"{k}{nid}" for nid in n._successors[tid]], tid)
elif n.is_leaf():
n.set_predecessor(f"{k}{n._predecessor[tid]}", tid)
else:
n.set_predecessor(f"{k}{n._predecessor[tid]}", tid)
n.set_successors([f"{k}{nid}" for nid in n._successors[tid]], tid)

t.paste(level, tree, deep=True)
return t


def huffman_tree(trees, level="", n_branches=2):
"""Huffman coding

Args:
trees (list): list of trees or nodes
level (tuple, optional): the prefix for identifier
set n_branches=2 by default

Returns:
Tree: Huffman tree
"""
assert len(trees) >= 2

if len(trees) == 2:
return merge(trees, level=level)
else:
ks = np.argsort([_get_frequency(tree) for tree in trees])[:n_branches]
t = merge([trees[k] for k in ks], level=level)
trees = [t, *(tree for k, tree in enumerate(trees) if k not in ks)]
return huffman_tree(trees, level=level)


def make_node(s, f):
"""Make `Node` object

s: str
f: number
"""
return Node(identifier="", data={"symbols": s, "frequency": f, "code": ""})


d = {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5}
nodes = [make_node(s, f) for s, f in d.items()]
nodes = list(nodes)
t = huffman_tree(nodes)
print(t)
47 changes: 47 additions & 0 deletions examples/random_tree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/env python3

"""
Generate a tree randomly; Test the `apply` method;
"""

import random
from treelib import Tree


def _random(max_depth=5, min_width=1, max_width=2, offset=()):
# generate a tree randomly
tree = Tree()
tree.create_node(identifier=offset)
if max_depth == 0:
return tree
elif max_depth == 1:
nb = random.randint(min_width, max_width)
for i in range(nb):
identifier = offset + (i,)
tree.create_node(identifier=identifier, parent=offset)
else:
nb = random.randint(min_width, max_width)
for i in range(nb):
_offset = offset + (i,)
max_depth -= 1
subtree = _random(max_depth=max_depth, max_width=max_width, offset=_offset)
tree.paste(offset, subtree)
return tree


def _map(func, tree):
# tree as a functor
tree = tree._clone(with_tree=True)
print(tree)
for a in tree.all_nodes_itr():
key(a)
return tree


def key(node):
node.tag = "-".join(map(str, node.identifier))


print(_map(key, _random()))

print(_random().apply(key))
2 changes: 1 addition & 1 deletion treelib/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ def tag(self):
@tag.setter
def tag(self, value):
"""Set the value of `_tag`."""
self._tag = value if value is not None else None
self._tag = value

def __repr__(self):
name = self.__class__.__name__
Expand Down
52 changes: 44 additions & 8 deletions treelib/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,12 @@ def all_nodes_itr(self):
"""
return self._nodes.values()

def iternodes(self):
"""
alias of `all_nodes_itr` but conform to the convention of Python.
"""
return self._nodes.values()

def ancestor(self, nid, level=None):
"""
For a given id, get ancestor node object at a given level.
Expand Down Expand Up @@ -377,7 +383,7 @@ def children(self, nid):

def contains(self, nid):
"""Check if the tree contains node of given id"""
return True if nid in self._nodes else False
return nid in self._nodes

def create_node(self, tag=None, identifier=None, parent=None, data=None):
"""
Expand Down Expand Up @@ -515,6 +521,9 @@ def get_node(self, nid):
return None
return self._nodes[nid]

def get_root(self):
return self.get_node(self.root)

def is_branch(self, nid):
"""
Return the children (ID) list of nid.
Expand Down Expand Up @@ -689,10 +698,12 @@ def paste(self, nid, new_tree, deep=False):
if set_joint:
raise ValueError("Duplicated nodes %s exists." % list(map(text, set_joint)))

for cid, node in iteritems(new_tree.nodes):
if deep:
node = deepcopy(new_tree[node])
self._nodes.update({cid: node})
if deep:
new_nodes = {cid: deepcopy(node) for cid, node in iteritems(new_tree.nodes)}
else:
new_nodes = new_tree.nodes
self._nodes.update(new_nodes)
for _, node in iteritems(new_nodes):
node.clone_pointers(new_tree.identifier, self._identifier)

self.__update_bpointer(new_tree.root, nid)
Expand Down Expand Up @@ -1003,9 +1014,8 @@ def subtree(self, nid, identifier=None):
# define nodes parent/children in this tree
# all pointers are the same as copied tree, except the root
st[node_n].clone_pointers(self._identifier, st.identifier)
if node_n == nid:
# reset root parent for the new tree
st[node_n].set_predecessor(None, st.identifier)
# reset root parent for the new tree
st[nid].set_predecessor(None, st.identifier)
return st

def update_node(self, nid, **attrs):
Expand Down Expand Up @@ -1129,6 +1139,32 @@ def to_graphviz(

f.close()

def apply(self, key, deep=True):
"""Morphism of tree
Work like the built-in `map`

Arguments
key -- impure function of a node
deep -- please keep it true
"""
tree = self._clone(with_tree=True, deep=deep)
for a in tree.all_nodes():
key(a)
return tree

def apply_data(self, key, deep=True):
"""morphism of tree, but acts on data of nodes.
It calls the method `apply`

Arguments
key -- pure function of node.data
"""

def _key(a):
a.data = key(a.data)

return self.apply(_key, deep=deep)

@classmethod
def from_map(cls, child_parent_dict, id_func=None, data_func=None):
"""
Expand Down