Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mondrian tree #2

Open
ryan102590 opened this issue Mar 1, 2019 · 3 comments
Open

mondrian tree #2

ryan102590 opened this issue Mar 1, 2019 · 3 comments

Comments

@ryan102590
Copy link

Hi Yubin,

Very interesting work! I was wondering if you had any ideas on implementing a mondrian tree: I have some script from nel215 that looks to be in the most basic form using just numpy, do you think their is a way we could implement it in a find_split/is_leaf form that you present?

import numpy as np

class MondrianForestRegressor(object):
def init(self, n_tree):
self.n_tree = n_tree
self.trees = []
for i in range(self.n_tree):
self.trees.append(MondrianTreeRegressor())
def fit(self, X, y):
for tree in self.trees:
tree.fit(X, y)
def partial_fit(self, X, y):
for tree in self.trees:
tree.partial_fit(X, y)

def get_params(self, deep):
    return {'n_tree': self.n_tree}

def predict(self, X):
    res = np.array([tree.predict(X) for tree in self.trees])
    return res.mean(axis=0)

class Node(object):
def init(self, min_list, max_list, tau, is_leaf, stat, parent=None, delta=None, xi=None):
self.parent = parent
self.tau = tau
self.is_leaf = is_leaf
self.min_list = min_list
self.max_list = max_list
self.delta = delta
self.xi = xi
self.left = None
self.right = None
self.stat = stat

def update_leaf(self, x, label):
    self.stat.add(x, label)

def update_internal(self):
    self.stat = self.left.stat.merge(self.right.stat)

def get_parent_tau(self):
    if self.parent is None:
        return 0.0
    return self.parent.tau

def __repr__(self):
    return "<mondrianforest.Node tau={} min_list={} max_list={} is_leaf={}>".format(
        self.tau,
        self.min_list,
        self.max_list,
        self.is_leaf,
    )

class RegressorFactory(object):
def create(self):
return Regressor()

class MondrianTree(object):
def init(self):
self.root = None
self.classes = set()

def create_leaf(self, x, label, parent):
    leaf = Node(
        min_list=x.copy(),
        max_list=x.copy(),
        is_leaf=True,
        stat=self.stat_factory.create(),
        tau=1e9,
        parent=parent,
    )
    leaf.update_leaf(x, label)
    return leaf

def extend_mondrian_block(self, node, x, label):
    '''
        return root of sub-tree
    '''
    e_min = np.maximum(node.min_list - x, 0)
    e_max = np.maximum(x - node.max_list, 0)
    e_sum = e_min + e_max
    rate = np.sum(e_sum) + 1e-9
    E = np.random.exponential(1.0/rate)
    if node.get_parent_tau() + E < node.tau:
        e_sample = np.random.rand() * np.sum(e_sum)
        delta = (e_sum.cumsum() > e_sample).argmax()
        if x[delta] > node.min_list[delta]:
            xi = np.random.uniform(node.min_list[delta], x[delta])
        else:
            xi = np.random.uniform(x[delta], node.max_list[delta])
        parent = Node(
            min_list=np.minimum(node.min_list, x),
            max_list=np.maximum(node.max_list, x),
            is_leaf=False,
            stat=self.stat_factory.create(),
            tau=node.get_parent_tau() + E,
            parent=node.parent,
            delta=delta,
            xi=xi,
        )
        sibling = self.create_leaf(x, label, parent=parent)
        if x[parent.delta] <= parent.xi:
            parent.left = sibling
            parent.right = node
        else:
            parent.left = node
            parent.right = sibling
        node.parent = parent
        parent.update_internal()
        return parent
    else:
        node.min_list = np.minimum(x, node.min_list)
        node.max_list = np.maximum(x, node.max_list)
        if not node.is_leaf:
            if x[node.delta] <= node.xi:
                node.left = self.extend_mondrian_block(node.left, x, label)
            else:
                node.right = self.extend_mondrian_block(node.right, x, label)
            node.update_internal()
        else:
            node.update_leaf(x, label)
        return node

def partial_fit(self, X, y):
    for x, label in zip(X, y):
        self.classes |= {label}
        if self.root is None:
            self.root = self.create_leaf(x, label, parent=None)
        else:
            self.root = self.extend_mondrian_block(self.root, x, label)

def fit(self, X, y):
    self.root = None
    self.partial_fit(X, y)

def _predict(self, x, node, p_not_separeted_yet):
    d = node.tau - node.get_parent_tau()
    eta = np.sum(np.maximum(x-node.max_list, 0) + np.maximum(node.min_list - x, 0))
    p = 1.0 - np.exp(-d*eta)
    result = node.stat.create_result(x, p_not_separeted_yet * p)
    if node.is_leaf:
        w = p_not_separeted_yet * (1.0 - p)
        return result.merge(node.stat.create_result(x, w))
    if x[node.delta] <= node.xi:
        child_result = self._predict(x, node.left, p_not_separeted_yet*(1.0-p))
    else:
        child_result = self._predict(x, node.right, p_not_separeted_yet*(1.0-p))
    return result.merge(child_result)

def get_params(self, deep):
    return {}

class MondrianTreeRegressor(object):
def init(self):
MondrianTree.init(self)
self.stat_factory = RegressorFactory()

def predict(self, X):
    res = []
    for x in X:
        predicted = self._predict(x, self.root, 1.0).get()
        res.append(predicted)
    # res=map(lambda x : (self._predict(X, self.root, 1.0).get()),X)
    return np.array(res)

class RegressorResult(object):
def init(self, avg):
self.avg = avg

def merge(self, r):
    return RegressorResult(self.avg + r.avg)

def get(self):
    return self.avg

class Regressor(object):
def init(self):
self.sum = 0
self.count = 0
def add(self, x, y):
(self.sum) += y
(self.count) += 1
def merge(self, r):
res = Regressor()
res.sum = self.sum + r.sum
res.count = self.count + r.count
return res

def predict(self, x):
    if self.count == 0:
        return 0
    else:
        return self.sum / self.count

def create_result(self, x, w):
    return RegressorResult(self.predict(x)*w)
@yubin-park
Copy link
Owner

@ryan102590 interesting idea. I guess you are referring to this repo: https://github.com/nel215/mondrianforest ?

@ryan102590
Copy link
Author

@yubin-park
correct! I just moved all of the classes into one file so that it would be easier for me to look at and learn. It's an interesting concept because of the pausing component of the algorithm when there isn't new information and it's efficient partial_fit capabilities.

@ryan102590
Copy link
Author

overall I'm hoping to implement a partial_fit stochastic boosting descent version and trying to find the most efficient way. I have a version now but it's very slow.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants