From ac70a0f914d9d1ae9269125d58d4101b0044275f Mon Sep 17 00:00:00 2001
From: Farid Rashidi <farid.rsh@gmail.com>
Date: Sat, 23 Oct 2021 18:11:20 -0400
Subject: [PATCH 01/11] [skip ci] refinement (#83)

---
 MANIFEST.in                                |  2 +-
 README.rst                                 |  6 +--
 tests/test_commands.py                     | 45 ++++++++++------------
 tests/test_logging.py                      |  1 +
 tests/test_pl.py                           |  6 +++
 tests/test_tl_scores.py                    | 14 +++++--
 tests/test_tl_solvers.py                   | 16 ++++++++
 trisicell/commands/_partf.py               |  4 +-
 trisicell/datasets/__init__.py             |  5 ++-
 trisicell/datasets/_simulate.py            | 19 ++++++++-
 trisicell/{tl/score => external}/_mp3.py   |  0
 trisicell/io/_genotype.py                  |  9 -----
 trisicell/tl/score/_others.py              |  2 +-
 trisicell/tl/solver/booster/_subsamples.py |  2 +-
 trisicell/ul/_trees.py                     |  4 +-
 trisicell/ul/_utils.py                     |  2 +-
 16 files changed, 86 insertions(+), 51 deletions(-)
 rename trisicell/{tl/score => external}/_mp3.py (100%)

diff --git a/MANIFEST.in b/MANIFEST.in
index 65d7b6a..f106fa0 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,7 +1,7 @@
 prune docs
 prune .scripts
 prune .github
+include LICENSE
 include requirements.txt
 include docs/requirements.txt
-include LICENSE
 include trisicell/datasets/*
diff --git a/README.rst b/README.rst
index 453cf1d..2d30037 100644
--- a/README.rst
+++ b/README.rst
@@ -57,7 +57,7 @@ Trisicell was developed in collaboration between the `Cancer Data Science Labora
     :target: https://github.com/faridrashidi/trisicell
     :alt: Stars
 
-.. |Contributions Welcome| image:: https://img.shields.io/static/v1.svg?label=contributions&message=welcome&color=0059b3&logo=handshake&logoColor=FFFFFF&style=flat-square
+.. |Contributions Welcome| image:: https://img.shields.io/static/v1.svg?label=contributions&message=welcome&color=blue&logo=handshake&logoColor=FFFFFF&style=flat-square
     :target: https://github.com/faridrashidi/trisicell/blob/master/CONTRIBUTING.rst
     :alt: Contributions Welcome
 
@@ -73,8 +73,8 @@ Trisicell was developed in collaboration between the `Cancer Data Science Labora
     :target: https://trisicell.readthedocs.io
     :alt: Docs Status
 
-.. |Pre-commit| image:: https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white&style=flat-square
-    :target: https://github.com/pre-commit/pre-commit
+.. |Pre-commit| image:: https://img.shields.io/badge/pre--commit.ci-passing-brightgreen?logo=pre-commit&logoColor=white&style=flat-square
+    :target: https://results.pre-commit.ci/latest/github/faridrashidi/trisicell/master
     :alt: Pre-commit
 
 .. |Code Style| image:: https://img.shields.io/badge/code%20style-black-000000.svg?logo=visualstudiocode&logoColor=FFFFFF&style=flat-square
diff --git a/tests/test_commands.py b/tests/test_commands.py
index bb2b4a1..567e27e 100644
--- a/tests/test_commands.py
+++ b/tests/test_commands.py
@@ -111,21 +111,36 @@ def test_mcalling(self):
         )
         assert result.exit_code == 0
 
-    @skip_graphviz
-    def test_cf2tree(self):
+    def test_search(self):
+        result = self.runner.invoke(
+            cli,
+            ["search", tsc.ul.get_file("trisicell.datasets/test/test.tsv"), "-p 2"],
+        )
+        assert result.exit_code == 0
+
+    def test_score(self):
         result = self.runner.invoke(
             cli,
             [
-                "cf2tree",
-                tsc.ul.get_file("trisicell.datasets/test/test.phiscsb.CFMatrix"),
+                "score",
+                tsc.ul.get_file(
+                    "trisicell.datasets/test/fp_0-fn_0-na_0.ground.CFMatrix"
+                ),
+                tsc.ul.get_file(
+                    "trisicell.datasets/test/fp_1-fn_0.1-na_0.bnb.CFMatrix"
+                ),
             ],
         )
         assert result.exit_code == 0
 
-    def test_search(self):
+    @skip_graphviz
+    def test_cf2tree(self):
         result = self.runner.invoke(
             cli,
-            ["search", tsc.ul.get_file("trisicell.datasets/test/test.tsv"), "-p 2"],
+            [
+                "cf2tree",
+                tsc.ul.get_file("trisicell.datasets/test/test.phiscsb.CFMatrix"),
+            ],
         )
         assert result.exit_code == 0
 
@@ -144,24 +159,6 @@ def test_partf(self):
         )
         assert result.exit_code == 0
 
-    @pytest.mark.skip(
-        reason="Using MLTD in two tests is taking so long in test_scores!"
-    )
-    def test_score(self):
-        result = self.runner.invoke(
-            cli,
-            [
-                "score",
-                tsc.ul.get_file(
-                    "trisicell.datasets/test/fp_0-fn_0-na_0.ground.CFMatrix"
-                ),
-                tsc.ul.get_file(
-                    "trisicell.datasets/test/fp_1-fn_0.1-na_0.bnb.CFMatrix"
-                ),
-            ],
-        )
-        assert result.exit_code == 0
-
     @pytest.mark.skip(reason="PyTest issue with multithreading!")
     def test_booster(self):
         result = self.runner.invoke(
diff --git a/tests/test_logging.py b/tests/test_logging.py
index 966fe33..5ee38dd 100644
--- a/tests/test_logging.py
+++ b/tests/test_logging.py
@@ -8,4 +8,5 @@ def test_logging(self):
         tsc.logg.hint("HINT")
         tsc.logg.info("INFO")
         tsc.logg.warn("WARN")
+        tsc.logg.info("TIME", time=True, color="red")
         assert True
diff --git a/tests/test_pl.py b/tests/test_pl.py
index 613aec1..4cfd3be 100644
--- a/tests/test_pl.py
+++ b/tests/test_pl.py
@@ -48,6 +48,12 @@ def test_clonal_tree_with_coloring(self):
             cell_info=adata.obs,
             color_attr="group_color",
         )
+        tsc.pl.clonal_tree(
+            tree,
+            muts_as_number=True,
+            cells_as_number=True,
+            show_id=True,
+        )
         assert True
 
     @skip_rpy2
diff --git a/tests/test_tl_scores.py b/tests/test_tl_scores.py
index 2b5d2e0..aac5064 100644
--- a/tests/test_tl_scores.py
+++ b/tests/test_tl_scores.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pytest
 
 import trisicell as tsc
 
@@ -18,9 +19,9 @@ def test_dl(self):
         dl = tsc.tl.dl(self.grnd, self.sol)
         assert np.abs(dl - 0.9880) < 0.0001
 
-    def test_mltd(self):
-        mltd = tsc.tl.mltd(self.grnd, self.sol)
-        assert np.abs(mltd["normalized_similarity"] - 0.7800) < 0.0001
+    def test_cc(self):
+        tsc.tl.cc(self.grnd, self.sol)
+        assert True
 
     def test_tpted(self):
         tpted = tsc.tl.tpted(self.grnd, self.sol)
@@ -41,3 +42,10 @@ def test_mp3(self):
     def test_rf(self):
         rf = tsc.tl.rf(self.grnd, self.sol)
         assert np.abs(rf - 0.4864) < 0.0001
+
+    @pytest.mark.skip(
+        reason="Using MLTD in two tests is taking so long in test_scores!"
+    )
+    def test_mltd(self):
+        mltd = tsc.tl.mltd(self.grnd, self.sol)
+        assert np.abs(mltd["normalized_similarity"] - 0.7800) < 0.0001
diff --git a/tests/test_tl_solvers.py b/tests/test_tl_solvers.py
index 23d635d..de806ca 100644
--- a/tests/test_tl_solvers.py
+++ b/tests/test_tl_solvers.py
@@ -130,3 +130,19 @@ def test_booster_scite(self):
             dep_weight=5,
         )
         assert tsc.ul.is_conflict_free_gusfield(df_out)
+
+    def test_booster_scistree_on_cells(self):
+        df_out = tsc.tl.booster(
+            self.df_in,
+            alpha=0.0000001,
+            beta=0.1,
+            solver="ScisTree",
+            sample_on="cells",
+            sample_size=10,
+            n_samples=20,
+            begin_index=0,
+            n_jobs=1,
+            n_iterations=10000,
+            dep_weight=5,
+        )
+        assert tsc.ul.is_conflict_free_gusfield(df_out)
diff --git a/trisicell/commands/_partf.py b/trisicell/commands/_partf.py
index f4eb5de..5dfb973 100644
--- a/trisicell/commands/_partf.py
+++ b/trisicell/commands/_partf.py
@@ -71,10 +71,10 @@ def partf(genotype_file, alpha, beta, n_samples, n_threads):
     subtrees_list = []
     tree_our_prob_list = []
 
-    def run(i):
+    def run():
         return draw_sample_clt(P, False, c=1, coef=10)
 
-    output = Parallel(n_jobs=n_threads)(delayed(run)(i) for i in range(0, n_samples))
+    output = Parallel(n_jobs=n_threads)(delayed(run)() for i in range(0, n_samples))
 
     for edges, subtrees, prior_prob in output:
         edges_list.append(edges)
diff --git a/trisicell/datasets/__init__.py b/trisicell/datasets/__init__.py
index 6bf64d0..180c7d7 100644
--- a/trisicell/datasets/__init__.py
+++ b/trisicell/datasets/__init__.py
@@ -26,7 +26,7 @@
     test,
     tnbc,
 )
-from trisicell.datasets._simulate import add_noise, simulate
+from trisicell.datasets._simulate import add_doublets, add_noise, simulate
 
 __all__ = (
     acute_lymphocytic_leukemia1,
@@ -53,6 +53,7 @@
     renal_cell_carcinoma,
     test,
     tnbc,
-    add_noise,
     simulate,
+    add_noise,
+    add_doublets,
 )
diff --git a/trisicell/datasets/_simulate.py b/trisicell/datasets/_simulate.py
index 6fa9951..a1df593 100644
--- a/trisicell/datasets/_simulate.py
+++ b/trisicell/datasets/_simulate.py
@@ -132,9 +132,8 @@ def toss(p):
                         data2[i][j] = data[i][j]
                 else:
                     tsc.logg.error("Wrong Input")
-                    sys.exit(2)
 
-    tsc.logg.info(f"FNs={countFN}, FPs={countFP}, NAs={countNA}")
+    # tsc.logg.info(f"FNs={countFN}, FPs={countFP}, NAs={countNA}")
 
     df_out = pd.DataFrame(data2)
     df_out.columns = df_in.columns
@@ -142,3 +141,19 @@ def toss(p):
     df_out.index.name = "cellIDxmutID"
 
     return df_out
+
+
+def add_doublets(df_ground, df_noisy, alpha, beta, missing, doublet):
+    df_doublet = df_noisy.copy()
+    doublet_cells = []
+    for _ in range(int(doublet * df_ground.shape[0])):
+        r1 = np.random.choice(df_ground.index, replace=False, size=1)
+        while r1 in doublet_cells:
+            r1 = np.random.choice(df_ground.index, replace=False, size=1)
+        doublet_cells.append(r1)
+        r2 = np.random.choice(df_ground.index, replace=False, size=1)
+        df_doublet.loc[r1] = 1 * np.logical_or(df_ground.loc[r1], df_ground.loc[r2])
+        df_doublet.loc[r1] = tsc.datasets.add_noise(
+            df_doublet.loc[r1], alpha=alpha, beta=beta, missing=missing
+        )
+    return df_doublet
diff --git a/trisicell/tl/score/_mp3.py b/trisicell/external/_mp3.py
similarity index 100%
rename from trisicell/tl/score/_mp3.py
rename to trisicell/external/_mp3.py
diff --git a/trisicell/io/_genotype.py b/trisicell/io/_genotype.py
index 51f53e5..f4dad3a 100644
--- a/trisicell/io/_genotype.py
+++ b/trisicell/io/_genotype.py
@@ -79,15 +79,6 @@ def _read_nwk(filepath):
             cn = node2id[c]
             G.add_edge(pn, cn)
 
-    root = [n for n in G.nodes if G.in_degree(n) == 0][0]
-    if G.out_degree(root) == 3:
-        child = list(G.successors(root))[1]
-        G.add_node(i, label="root")
-        G.remove_edge(root, child)
-        G.add_edge(i, child)
-        G.add_edge(i, root)
-        G.nodes[root]["label"] = ""
-
     i = 0
     for e, u, _ in G.edges.data("label"):
         G.edges[(e, u)]["label"] = f"m{i}"
diff --git a/trisicell/tl/score/_others.py b/trisicell/tl/score/_others.py
index da1822d..abdbe84 100644
--- a/trisicell/tl/score/_others.py
+++ b/trisicell/tl/score/_others.py
@@ -5,7 +5,7 @@
 import numpy as np
 
 import trisicell as tsc
-from trisicell.tl.score._mp3 import build_tree, similarity
+from trisicell.external._mp3 import build_tree, similarity
 from trisicell.ul._trees import _to_newick
 
 
diff --git a/trisicell/tl/solver/booster/_subsamples.py b/trisicell/tl/solver/booster/_subsamples.py
index baa9fea..e796125 100644
--- a/trisicell/tl/solver/booster/_subsamples.py
+++ b/trisicell/tl/solver/booster/_subsamples.py
@@ -43,7 +43,7 @@ def run(i):
             dfn = dfn[dfn.columns[x]]
             if dfn.shape[1] < 2:
                 return None
-            dfo, _ = tsc.tl.scistree(dfn, alpha, beta, False, experiment=True)
+            dfo, _ = tsc.tl.scistree(dfn, alpha, beta, experiment=True)
             dfo.to_csv(f"{tmpdir}/{i}.CFMatrix", sep="\t")
 
     with tsc.ul.tqdm_joblib(
diff --git a/trisicell/ul/_trees.py b/trisicell/ul/_trees.py
index dfb7b69..1d7c18a 100644
--- a/trisicell/ul/_trees.py
+++ b/trisicell/ul/_trees.py
@@ -101,8 +101,8 @@ def _contains(col1, col2):
                 tree.graph["splitter_mut"]
             )
         untilnow_cell = df.loc[
-            (df[set(untilnow_mut)] == 1).all(axis=1)
-            & (df[{x for x in df.columns if x not in untilnow_mut}] == 0).all(axis=1)
+            (df[untilnow_mut] == 1).all(axis=1)
+            & (df[[x for x in df.columns if x not in untilnow_mut]] == 0).all(axis=1)
         ].index
         if len(untilnow_cell) > 0:
             clusters[node] = f"{tree.graph['splitter_cell'].join(untilnow_cell)}"
diff --git a/trisicell/ul/_utils.py b/trisicell/ul/_utils.py
index 064003b..63e3b1c 100644
--- a/trisicell/ul/_utils.py
+++ b/trisicell/ul/_utils.py
@@ -122,7 +122,7 @@ def get_param(filename):
     data["fp"] = float(basename.split("-")[7].split("_")[1])
     data["fn"] = float(basename.split("-")[8].split("_")[1])
     data["na"] = float(basename.split("-")[9].split("_")[1])
-    data["d"] = int(basename.split("-")[10].split("_")[1])
+    data["d"] = float(basename.split("-")[10].split("_")[1])
     last = basename.split("-")[11]
     if "." in last:
         data["l"] = int(last.split(".")[0].split("_")[1])

From a7cc295c478f6350e868987f9ac7861275ed56af Mon Sep 17 00:00:00 2001
From: Farid Rashidi <farid.rsh@gmail.com>
Date: Sat, 23 Oct 2021 23:22:00 -0400
Subject: [PATCH 02/11] [skip ci] add siclonefit and sciphi (#84)

---
 trisicell/datasets/_simulate.py    |  2 +-
 trisicell/tl/__init__.py           |  2 ++
 trisicell/tl/solver/__init__.py    |  1 +
 trisicell/tl/solver/_sciphi.py     | 52 ++++++++++++++++++++++++++++++
 trisicell/tl/solver/_siclonefit.py | 27 ++++++++--------
 5 files changed, 69 insertions(+), 15 deletions(-)
 create mode 100644 trisicell/tl/solver/_sciphi.py

diff --git a/trisicell/datasets/_simulate.py b/trisicell/datasets/_simulate.py
index a1df593..ad9f076 100644
--- a/trisicell/datasets/_simulate.py
+++ b/trisicell/datasets/_simulate.py
@@ -60,7 +60,7 @@ def simulate(n_cells=10, n_muts=10, n_clones=3, alpha=0.00001, beta=0.1, missing
     with ro.conversion.localconverter(ro.default_converter + pandas2ri.converter):
         dat = ro.conversion.rpy2py(dat.rx2("D"))
     dat[dat == 2] = 3
-    df = pd.DataFrame(dat, dtype=int)
+    df = pd.DataFrame(dat.T, dtype=int)
     df.columns = [f"mut{x}" for x in df.columns]
     df.index = [f"cell{x}" for x in df.index]
 
diff --git a/trisicell/tl/__init__.py b/trisicell/tl/__init__.py
index 05b7447..44a2e17 100644
--- a/trisicell/tl/__init__.py
+++ b/trisicell/tl/__init__.py
@@ -22,6 +22,7 @@
     phiscsi_bulk,
     rscistree,
     sbm,
+    sciphi,
     scistree,
     scite,
     siclonefit,
@@ -63,4 +64,5 @@
     rf,
     sphyr,
     grmt,
+    sciphi,
 )
diff --git a/trisicell/tl/solver/__init__.py b/trisicell/tl/solver/__init__.py
index 891ac97..7202db8 100644
--- a/trisicell/tl/solver/__init__.py
+++ b/trisicell/tl/solver/__init__.py
@@ -11,6 +11,7 @@
     phiscsi_bulk,
 )
 from trisicell.tl.solver._sbm import sbm
+from trisicell.tl.solver._sciphi import sciphi
 from trisicell.tl.solver._scistree import iscistree, rscistree, scistree
 from trisicell.tl.solver._scite import infscite, scite
 from trisicell.tl.solver._siclonefit import siclonefit
diff --git a/trisicell/tl/solver/_sciphi.py b/trisicell/tl/solver/_sciphi.py
new file mode 100644
index 0000000..be894e8
--- /dev/null
+++ b/trisicell/tl/solver/_sciphi.py
@@ -0,0 +1,52 @@
+import os
+import time
+
+import trisicell as tsc
+
+
+def sciphi(df_input):
+    # TODO: implement
+    executable = tsc.ul.executable("sciphi", "SCIPhI")
+
+    tsc.logg.info("running SCIPhI with")
+
+    # tmpdir = tsc.ul.tmpdirsys(suffix=".sciphi")
+    tmpdir = "test"
+    tsc.ul.cleanup(tmpdir)
+    tsc.ul.mkdir(tmpdir)
+
+    matrix_I = df_input.values
+    with open(f"{tmpdir}/sciphi.mpileup", "w") as fout:
+        for j in range(matrix_I.shape[1]):
+            line = f"seq1\t{(j+1)*100}\tA"
+            r = q = ""
+            for i in range(matrix_I.shape[0]):
+                if matrix_I[i, j] == 0:
+                    r = "."
+                elif matrix_I[i, j] == 1:
+                    r = "T"
+                elif matrix_I[i, j] == 3:
+                    r = "N"
+                q = "<"
+                line = f"{line}\t1\t{r}\t{q}"
+            fout.write(line + "\n")
+    with open(f"{tmpdir}/sciphi.cellnames", "w") as fout:
+        for i in range(matrix_I.shape[0]):
+            fout.write(f"{df_input.index[i]}\tCT\n")
+
+    cmd = (
+        f"{executable} "
+        f"-o {tmpdir}/out "
+        f"--in {tmpdir}/sciphi.cellnames "
+        "--seed 42 "
+        f"{tmpdir}/sciphi.mpileup "
+        f"> {tmpdir}/sciphi.log"
+    )
+
+    s_time = time.time()
+    os.system(cmd)
+    e_time = time.time()
+    running_time = e_time - s_time
+    running_time
+
+    return None
diff --git a/trisicell/tl/solver/_siclonefit.py b/trisicell/tl/solver/_siclonefit.py
index 3c5fe28..d00aa8a 100644
--- a/trisicell/tl/solver/_siclonefit.py
+++ b/trisicell/tl/solver/_siclonefit.py
@@ -32,34 +32,33 @@ def siclonefit(df_input, alpha, beta, n_iters):
         f"-ipMat {tmpdir.name}/siclonefit.input "
         f"-fp {alpha} "
         f"-fn {beta} "
-        "-df 0 "
+        # "-df 0 "
         f"-missing {np.sum(I_mtr == 3)/(I_mtr.size)} "
-        "-f 3 "
-        "-recurProb 0 "
-        "-delProb 0 "
-        "-LOHProb 0 "
-        f"-iter {n_iters} "
+        # "-f 3 "
+        # "-recurProb 0 "
+        # "-delProb 0 "
+        # "-LOHProb 0 "
+        # f"-iter {n_iters} "
         f"-cellNames {tmpdir.name}/siclonefit.cellnames "
         f"-geneNames {tmpdir.name}/siclonefit.genenames "
+        # "-r "
+        # "-burnin "
+        # "-printIter "
+        # "-treeIter "
+        # "-doublet "
         f"-outDir {tmpdir.name} > {tmpdir.name}/siclonefit.log"
     )
-    # check the following parameters
-    # -burnin
-    # -printIter
-    # -treeIter
-    # -doublet
     s_time = time.time()
     os.system(cmd)
     e_time = time.time()
     running_time = e_time - s_time
 
-    df = pd.read_csv(
-        f"{tmpdir.name}/20p_missing_samples/best/best_MAP_predicted_genotype.txt",
+    df_output = pd.read_csv(
+        f"{tmpdir.name}/samples/best/best_MAP_predicted_genotype.txt",
         sep=" ",
         header=None,
         index_col=0,
     ).T
-    df_output = pd.DataFrame(df.values)
     df_output.columns = df_input.columns
     df_output.index = df_input.index
     df_output.index.name = "cellIDxmutID"

From d0360a9d75df1b0c5963f4b72117abdb112ad59c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 28 Oct 2021 18:15:48 -0400
Subject: [PATCH 03/11] [skip ci][pre-commit.ci] pre-commit autoupdate (#86)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/asottile/yesqa: v1.2.3 → v1.3.0](https://github.com/asottile/yesqa/compare/v1.2.3...v1.3.0)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index dbc90d3..6525bb2 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -85,7 +85,7 @@ repos:
         additional_dependencies: [flake8-tidy-imports, flake8-docstrings, flake8-comprehensions, flake8-bugbear, flake8-blind-except]
         args: [--max-line-length=88, --config=setup.cfg]
 -   repo: https://github.com/asottile/yesqa
-    rev: v1.2.3
+    rev: v1.3.0
     hooks:
     -   id: yesqa
         additional_dependencies: [flake8-tidy-imports, flake8-docstrings, flake8-comprehensions, flake8-bugbear, flake8-blind-except]

From aeba1b905827c59a37bc1686b9159e420d6bb4d8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 1 Nov 2021 15:40:57 -0400
Subject: [PATCH 04/11] [skip ci][pre-commit.ci] pre-commit autoupdate (#87)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/psf/black: 21.9b0 → 21.10b0](https://github.com/psf/black/compare/21.9b0...21.10b0)
- [github.com/asottile/yesqa: v1.2.3 → v1.3.0](https://github.com/asottile/yesqa/compare/v1.2.3...v1.3.0)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 6525bb2..111d93b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,7 +7,7 @@ default_stages:
 minimum_pre_commit_version: 2.9.3
 repos:
 -   repo: https://github.com/psf/black
-    rev: 21.9b0
+    rev: 21.10b0
     hooks:
     -   id: black
         additional_dependencies: [toml]

From 06a910139cfefc0dc662e08f05d3921fd03043d1 Mon Sep 17 00:00:00 2001
From: Farid Rashidi <farid.rsh@gmail.com>
Date: Wed, 3 Nov 2021 19:09:31 -0400
Subject: [PATCH 05/11] [skip ci] update docs (#85)

---
 docs/source/about.rst                 |  4 +-
 docs/source/api.rst                   | 70 +++++----------------------
 docs/source/cli.rst                   | 34 ++++---------
 docs/source/conf.py                   |  1 -
 examples/comparison/README.rst        |  4 --
 examples/comparison/compute_scores.py | 49 -------------------
 6 files changed, 23 insertions(+), 139 deletions(-)
 delete mode 100644 examples/comparison/README.rst
 delete mode 100644 examples/comparison/compute_scores.py

diff --git a/docs/source/about.rst b/docs/source/about.rst
index 79a7160..637a672 100644
--- a/docs/source/about.rst
+++ b/docs/source/about.rst
@@ -59,10 +59,10 @@ observed data.
 
 There are several techniques and methods to remove the noise/conflicts from
 the input genotype matrix. They are mostly based on Integer Linear Programming
-(ILP), Constraint Satisfaction Prgramming (CSP), Markov chain Monte Carlo (MCMC)
+(ILP), Constraint Satisfaction Programming (CSP), Markov chain Monte Carlo (MCMC)
 sampling and Neighbor Joining (NJ). For more details, we highly recommend to
 read our `Trisicell <https://doi.org/10.1101/2021.03.26.437185>`_ and
-`review <https://doi.org/10.1101/2020.07.15.204081>`_ papers about building
+`review <https://doi.org/10.1089/cmb.2020.0595>`_ papers about building
 tumor progression tree by exploring the space of binary matrices.
 
 Trisicell Components
diff --git a/docs/source/api.rst b/docs/source/api.rst
index 28cfbe4..14ef415 100644
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@@ -13,41 +13,6 @@ After mutation calling and building the input data via our suggested
 :ref:`mutation calling pipeline <caller>`.
 
 
-Datasets (datasets)
--------------------
-This module offers a bunch of functions for simulating data.
-
-.. module:: trisicell.datasets
-.. currentmodule:: trisicell
-.. autosummary::
-    :toctree: .
-
-    datasets.example
-    datasets.simulate
-    datasets.add_noise
-    datasets.melanoma20
-    datasets.colorectal1
-    datasets.colorectal2
-    datasets.colorectal3
-    datasets.acute_lymphocytic_leukemia1
-    datasets.acute_lymphocytic_leukemia2
-    datasets.acute_lymphocytic_leukemia3
-    datasets.acute_lymphocytic_leukemia4
-    datasets.acute_lymphocytic_leukemia5
-    datasets.acute_lymphocytic_leukemia6
-    datasets.high_grade_serous_ovarian_cancer1
-    datasets.high_grade_serous_ovarian_cancer2
-    datasets.high_grade_serous_ovarian_cancer3
-    datasets.high_grade_serous_ovarian_cancer_3celllines
-    datasets.myeloproliferative_neoplasms18
-    datasets.myeloproliferative_neoplasms78
-    datasets.myeloproliferative_neoplasms712
-    datasets.renal_cell_carcinoma
-    datasets.muscle_invasive_bladder
-    datasets.erbc
-    datasets.tnbc
-
-
 Read/Write (io)
 ---------------
 This module offers a bunch of functions for reading and writing of the data.
@@ -75,7 +40,6 @@ data.
     pp.remove_cell_by_list
     pp.filter_mut_reference_must_present_in_at_least
     pp.filter_mut_mutant_must_present_in_at_least
-    pp.bifiltering
     pp.consensus_combine
 
 
@@ -93,12 +57,6 @@ and calculating the probability of mutations seeding particular cells.
     :toctree: .
 
     tl.booster
-    tl.scite
-    tl.phiscsb
-    tl.scistree
-    tl.onconem
-    tl.huntress
-
 
 **Partition function calculation (Trisicell-PartF)**
 
@@ -114,20 +72,6 @@ and calculating the probability of mutations seeding particular cells.
 
     tl.consensus
 
-**For comparing two phylogenetic trees**
-
-.. autosummary::
-    :toctree: .
-
-    tl.ad
-    tl.dl
-    tl.mltd
-    tl.tpted
-    tl.caset
-    tl.disc
-    tl.mp3
-    tl.rf
-
 
 Plotting (pl)
 -------------
@@ -154,5 +98,15 @@ This module offers a bunch of utility functions.
     ul.to_tree
     ul.to_cfmatrix
     ul.to_mtree
-    ul.hclustering
-    ul.is_conflict_free_gusfield
+
+
+Datasets (datasets)
+-------------------
+This module offers a bunch of functions for simulating data.
+
+.. module:: trisicell.datasets
+.. currentmodule:: trisicell
+.. autosummary::
+    :toctree: .
+
+    datasets.example
diff --git a/docs/source/cli.rst b/docs/source/cli.rst
index 0d97c29..b68855f 100644
--- a/docs/source/cli.rst
+++ b/docs/source/cli.rst
@@ -13,7 +13,9 @@ following output:
 
     Usage: trisicell [OPTIONS] COMMAND [ARGS]...
 
-      Scalable intratumor heterogeneity inference and validation from single-cell data
+      Trisicell.
+
+      Scalable intratumor heterogeneity inference and validation from single-cell data.
 
     Options:
       --version  Show the version and exit.
@@ -21,18 +23,9 @@ following output:
 
     Commands:
       mcalling   Mutation calling.
-      score      Calculate scores between two trees.
-      scistree   Run ScisTree.
-      scite      Run SCITE.
-      booster    Run Booster.
-      phiscsb    Run PhISCS (CSP version).
-      phiscsi    Run PhISCS (ILP version).
-      bnb        Run PhISCS-BnB.
-      huntress   Run HUNTRESS.
-      cf2newick  Convert conflict-free to newick file.
-      cf2tree    Convert conflict-free to clonal tree.
-      consensus  Calculate consensus betweeen two trees.
-      search     Grid search for all parameters.
+      booster    Boost available tree reconstruction tool (Trisicell-Boost).
+      partf      Get samples or calculate for PartF.
+      consensus  Build consensus tree between two phylogenetic trees (Trisicell-Cons).
 
 
 ``mcalling`` - Run Mutation Calling
@@ -53,19 +46,10 @@ following output:
     :nested: full
 
 
-``scite`` - Run SCITE
----------------------
-
-.. click:: trisicell.commands.trisicell:cli
-    :prog: trisicell
-    :commands: scite
-    :nested: full
-
-
-``score`` - Calculating Scores
-------------------------------
+``consensus`` - Run Consensus
+-----------------------------
 
 .. click:: trisicell.commands.trisicell:cli
     :prog: trisicell
-    :commands: score
+    :commands: consensus
     :nested: full
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 80b0ddd..2f0d8d6 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -206,7 +206,6 @@ def reset_matplotlib(gallery_conf, fname):
     "subsection_order": ExplicitOrder(
         [
             rel_example_dir / "reconstruction",  # really must be relative
-            rel_example_dir / "comparison",
         ]
     ),
     "reference_url": {
diff --git a/examples/comparison/README.rst b/examples/comparison/README.rst
deleted file mode 100644
index 031b9e2..0000000
--- a/examples/comparison/README.rst
+++ /dev/null
@@ -1,4 +0,0 @@
-Comparison
-----------
-
-Below is a gallery of examples for comparing two phylogenetic trees.
diff --git a/examples/comparison/compute_scores.py b/examples/comparison/compute_scores.py
deleted file mode 100644
index babfdf5..0000000
--- a/examples/comparison/compute_scores.py
+++ /dev/null
@@ -1,49 +0,0 @@
-"""
-Comparing scores for two phylogenetic trees
--------------------------------------------
-
-This example shows how to compare/measure two inferred genotype data (trees).
-"""
-
-import trisicell as tsc
-
-# %%
-# First, we load two binary test single-cell genotype data.
-grnd = tsc.io.read(
-    tsc.ul.get_file("trisicell.datasets/test/fp_0-fn_0-na_0.ground.CFMatrix")
-)
-sol = tsc.io.read(
-    tsc.ul.get_file("trisicell.datasets/test/fp_1-fn_0.1-na_0.bnb.CFMatrix")
-)
-
-# %%
-# Calculating the ancestor-descendent accuracy.
-tsc.tl.ad(grnd, sol)
-
-# %%
-# Calculating the different-lineage accuracy.
-tsc.tl.dl(grnd, sol)
-
-# %%
-# Calculating the multi-labeled tree dissimilarity measure (MLTD).
-tsc.tl.mltd(grnd, sol)
-
-# %%
-# Calculating the tumor phylogeny tree edit distance measure (TPTED).
-tsc.tl.tpted(grnd, sol)
-
-# %%
-# Calculating the distinctly inherited sets score (DISC).
-tsc.tl.disc(grnd, sol)
-
-# %%
-# Calculating the commonly ancestor sets score (CASet).
-tsc.tl.caset(grnd, sol)
-
-# %%
-# Calculating the Triplet-based similarity score (MP3).
-tsc.tl.mp3(grnd, sol)
-
-# %%
-# Calculating the Robinsold-Foulds similarity score (1 - normalized_distance).
-tsc.tl.rf(grnd, sol)

From 793e83c4cc6be37e37b4e8f368ed111a6406e29d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 8 Nov 2021 15:33:52 -0500
Subject: [PATCH 06/11] [skip ci][pre-commit.ci] pre-commit autoupdate (#88)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/psf/black: 21.9b0 → 21.10b0](https://github.com/psf/black/compare/21.9b0...21.10b0)
- [github.com/PyCQA/isort: 5.9.3 → 5.10.0](https://github.com/PyCQA/isort/compare/5.9.3...5.10.0)
- [github.com/asottile/yesqa: v1.2.3 → v1.3.0](https://github.com/asottile/yesqa/compare/v1.2.3...v1.3.0)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 111d93b..ba2eb1d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -13,7 +13,7 @@ repos:
         additional_dependencies: [toml]
         args: [--line-length=88, --experimental-string-processing]
 -   repo: https://github.com/PyCQA/isort
-    rev: 5.9.3
+    rev: 5.10.0
     hooks:
     -   id: isort
         additional_dependencies: [toml]

From ea3bbe9fb7f8da805387921d345422d658f8837a Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 15 Nov 2021 16:43:39 -0500
Subject: [PATCH 07/11] [skip ci][pre-commit.ci] pre-commit autoupdate (#89)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/psf/black: 21.9b0 → 21.10b0](https://github.com/psf/black/compare/21.9b0...21.10b0)
- [github.com/PyCQA/isort: 5.9.3 → 5.10.1](https://github.com/PyCQA/isort/compare/5.9.3...5.10.1)
- [github.com/PyCQA/doc8: 0.9.1 → 0.10.1](https://github.com/PyCQA/doc8/compare/0.9.1...0.10.1)
- [github.com/asottile/yesqa: v1.2.3 → v1.3.0](https://github.com/asottile/yesqa/compare/v1.2.3...v1.3.0)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Farid Rashidi <farid.rsh@gmail.com>
---
 .pre-commit-config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ba2eb1d..0f1a2b5 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -13,7 +13,7 @@ repos:
         additional_dependencies: [toml]
         args: [--line-length=88, --experimental-string-processing]
 -   repo: https://github.com/PyCQA/isort
-    rev: 5.10.0
+    rev: 5.10.1
     hooks:
     -   id: isort
         additional_dependencies: [toml]
@@ -69,7 +69,7 @@ repos:
     -   id: rst-directive-colons
     -   id: rst-inline-touching-normal
 -   repo: https://github.com/PyCQA/doc8
-    rev: 0.9.1
+    rev: 0.10.1
     hooks:
     -   id: doc8
         args: [--max-line-length=88]

From 399578fb87999ea2d25611ac23c4d69dc6232946 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 22 Nov 2021 16:29:12 -0500
Subject: [PATCH 08/11] [skip ci][pre-commit.ci] pre-commit autoupdate (#90)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/psf/black: 21.9b0 → 21.11b1](https://github.com/psf/black/compare/21.9b0...21.11b1)
- [github.com/PyCQA/isort: 5.9.3 → 5.10.1](https://github.com/PyCQA/isort/compare/5.9.3...5.10.1)
- [github.com/asottile/blacken-docs: v1.11.0 → v1.12.0](https://github.com/asottile/blacken-docs/compare/v1.11.0...v1.12.0)
- [github.com/asottile/pyupgrade: v2.29.0 → v2.29.1](https://github.com/asottile/pyupgrade/compare/v2.29.0...v2.29.1)
- [github.com/PyCQA/doc8: 0.9.1 → 0.10.1](https://github.com/PyCQA/doc8/compare/0.9.1...0.10.1)
- [github.com/asottile/yesqa: v1.2.3 → v1.3.0](https://github.com/asottile/yesqa/compare/v1.2.3...v1.3.0)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Farid Rashidi <farid.rsh@gmail.com>
---
 .pre-commit-config.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0f1a2b5..adee86e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,7 +7,7 @@ default_stages:
 minimum_pre_commit_version: 2.9.3
 repos:
 -   repo: https://github.com/psf/black
-    rev: 21.10b0
+    rev: 21.11b1
     hooks:
     -   id: black
         additional_dependencies: [toml]
@@ -50,12 +50,12 @@ repos:
     hooks:
     -   id: rstcheck
 -   repo: https://github.com/asottile/blacken-docs
-    rev: v1.11.0
+    rev: v1.12.0
     hooks:
     -   id: blacken-docs
         additional_dependencies: [black==20.8b1]
 -   repo: https://github.com/asottile/pyupgrade
-    rev: v2.29.0
+    rev: v2.29.1
     hooks:
     -   id: pyupgrade
         args: [--py3-plus, --py37-plus]

From 31867deb9965aeb7d99db74436cc1bd773b71b72 Mon Sep 17 00:00:00 2001
From: Farid Rashidi <farid.rsh@gmail.com>
Date: Mon, 22 Nov 2021 21:00:28 -0500
Subject: [PATCH 09/11] [skip ci] add multi threaded scistree (#91)

---
 trisicell/commands/_scistree.py               |    14 +-
 trisicell/external/scistree/BinaryMatrix.cpp  |  2951 +++--
 trisicell/external/scistree/BinaryMatrix.h    |   218 +-
 .../external/scistree/BioSequenceMatrix.cpp   |  1705 +--
 .../external/scistree/BioSequenceMatrix.h     |   164 +-
 .../external/scistree/GenotypeMatrix.cpp      |   524 +-
 trisicell/external/scistree/GenotypeMatrix.h  |    81 +-
 trisicell/external/scistree/MarginalTree.cpp  |  5203 ++++----
 trisicell/external/scistree/MarginalTree.h    |   259 +-
 trisicell/external/scistree/PhylogenyTree.cpp |  1089 +-
 trisicell/external/scistree/PhylogenyTree.h   |    73 +-
 .../external/scistree/PhylogenyTreeBasic.cpp  | 10036 ++++++++--------
 .../external/scistree/PhylogenyTreeBasic.h    |   588 +-
 trisicell/external/scistree/RBT.cpp           |  4548 +++----
 trisicell/external/scistree/RBT.h             |   325 +-
 .../external/scistree/RerootTreeUtils.cpp     |   625 +-
 trisicell/external/scistree/ScistDoublet.cpp  |  2134 ++--
 trisicell/external/scistree/ScistDoublet.hpp  |   184 +-
 .../external/scistree/ScistErrRateInf.cpp     |   162 +-
 .../external/scistree/ScistErrRateInf.hpp     |    36 +-
 trisicell/external/scistree/ScistGenotype.cpp |  2034 ++--
 trisicell/external/scistree/ScistGenotype.hpp |   311 +-
 .../external/scistree/ScistPerfPhyImp.cpp     |  2012 ++--
 .../external/scistree/ScistPerfPhyImp.hpp     |   212 +-
 .../external/scistree/ScistPerfPhyUtils.cpp   |   853 +-
 .../external/scistree/ScistPerfPhyUtils.hpp   |   154 +-
 trisicell/external/scistree/TreeBuilder.cpp   |  2442 ++--
 trisicell/external/scistree/TreeBuilder.h     |   160 +-
 .../external/scistree/UnWeightedGraph.cpp     |  1718 +--
 trisicell/external/scistree/UnWeightedGraph.h |   396 +-
 trisicell/external/scistree/Utils.cpp         |  1977 +--
 trisicell/external/scistree/Utils.h           |   169 +-
 trisicell/external/scistree/Utils2.cpp        |  1256 +-
 trisicell/external/scistree/Utils2.h          |    70 +-
 trisicell/external/scistree/Utils3.cpp        |  3777 +++---
 trisicell/external/scistree/Utils3.h          |   323 +-
 trisicell/external/scistree/Utils4.cpp        |  1394 ++-
 trisicell/external/scistree/Utils4.h          |  2281 ++--
 .../external/scistree/UtilsNumerical.cpp      |   359 +-
 trisicell/external/scistree/UtilsNumerical.h  |   150 +-
 trisicell/external/scistree/ctpl_stl.h        |   282 +
 trisicell/external/scistree/main.cpp          |   659 +-
 trisicell/tl/solver/_scistree.py              |    18 +-
 43 files changed, 28224 insertions(+), 25702 deletions(-)
 create mode 100644 trisicell/external/scistree/ctpl_stl.h

diff --git a/trisicell/commands/_scistree.py b/trisicell/commands/_scistree.py
index 6b9a66a..e772cac 100644
--- a/trisicell/commands/_scistree.py
+++ b/trisicell/commands/_scistree.py
@@ -23,14 +23,22 @@
     required=True,
     type=float,
 )
-def scistree(genotype_file, alpha, beta):
+@click.option(
+    "--n_threads",
+    "-p",
+    default=1,
+    type=int,
+    show_default=True,
+    help="Number of threads.",
+)
+def scistree(genotype_file, alpha, beta, n_threads):
     """ScisTree.
 
     Accurate and efficient cell lineage tree inference from noisy
     single cell data: the maximum likelihood perfect phylogeny approach
     :cite:`ScisTree`.
 
-    trisicell scistree input.SC 0.0001 0.1
+    trisicell scistree input.SC 0.0001 0.1 -p 1
     """
 
     outfile = os.path.splitext(genotype_file)[0]
@@ -39,7 +47,7 @@ def scistree(genotype_file, alpha, beta):
     tsc.settings.logfile = f"{outfile}.scistree.log"
 
     df_in = tsc.io.read(genotype_file)
-    df_out = tsc.tl.scistree(df_in, alpha=alpha, beta=beta)
+    df_out = tsc.tl.scistree(df_in, alpha=alpha, beta=beta, n_threads=n_threads)
     tsc.io.write(df_out, f"{outfile}.scistree.CFMatrix")
 
     return None
diff --git a/trisicell/external/scistree/BinaryMatrix.cpp b/trisicell/external/scistree/BinaryMatrix.cpp
index 165fcee..17d41aa 100644
--- a/trisicell/external/scistree/BinaryMatrix.cpp
+++ b/trisicell/external/scistree/BinaryMatrix.cpp
@@ -1,1235 +1,1498 @@
 #include "BinaryMatrix.h"
-#include "Utils2.h"
-#include <algorithm>
 #include <cmath>
-#include <cstdio>
 #include <cstdlib>
-#include <iostream>
+#include <cstdio>
+#include <algorithm>
 #include <sstream>
+#include <iostream>
 #include <string>
+#include "Utils2.h"
 
 // ***************************************************************************
 // Define a reusable binary matrix class
 // ***************************************************************************
 
-BinaryMatrix ::BinaryMatrix() { nCols = 0; }
+BinaryMatrix ::BinaryMatrix()
+{
+	nCols = 0;
+}
 
-BinaryMatrix ::~BinaryMatrix() {
-  // Need to free up data if needed
-  Clear();
+BinaryMatrix ::~BinaryMatrix()
+{
+	// Need to free up data if needed
+	Clear();
 }
 
-BinaryMatrix ::BinaryMatrix(int nr, int nc) { SetSize(nr, nc); }
+BinaryMatrix ::BinaryMatrix(int nr, int nc)
+{
+	SetSize(nr, nc);
+}
 
-BinaryMatrix ::BinaryMatrix(const BinaryMatrix &rhs) { Copy(rhs); }
+BinaryMatrix ::BinaryMatrix(const BinaryMatrix &rhs)
+{
+	Copy(rhs);
+}
 
-BinaryMatrix &BinaryMatrix ::operator=(const BinaryMatrix &rhs) {
-  Clear();
+BinaryMatrix &BinaryMatrix ::operator=(const BinaryMatrix &rhs)
+{
+	Clear();
 
-  Copy(rhs);
+	Copy(rhs);
 
-  return *this;
+	return *this;
 }
 
-bool BinaryMatrix ::IsDataValid(int val) {
-  if (val == 0 || val == 1) {
-    return true;
-  } else {
-    return false;
-  }
+bool BinaryMatrix ::IsDataValid(int val)
+{
+	if (val == 0 || val == 1)
+	{
+		return true;
+	}
+	else
+	{
+		return false;
+	}
 }
 
 //#if 0
-void BinaryMatrix ::TrimNgbrDupCompSites(set<int> *pRemovedSet) {
-  set<int> setOfRemovals; // contains sites to be removed
-  int cleft = 0;
-  while (cleft < nCols - 1) {
-    // Check to see if the next row  immediately is complement or not
-    if (IsColComplement(cleft, cleft + 1) == true ||
-        IsColDuplicate(cleft, cleft + 1) == true) {
-      setOfRemovals.insert(cleft + 1);
-      // cout << "Site " << cleft+1 << " is same/complement." << endl;
-    }
-    // Consider  next site
-    cleft++;
-  }
-  if (pRemovedSet != NULL) {
-    pRemovedSet->clear();
-    *pRemovedSet = setOfRemovals;
-  }
-  // Finally, remove columns
-  RemoveColumns(setOfRemovals);
+void BinaryMatrix ::TrimNgbrDupCompSites(set<int> *pRemovedSet)
+{
+	set<int> setOfRemovals; // contains sites to be removed
+	int cleft = 0;
+	while (cleft < nCols - 1)
+	{
+		// Check to see if the next row  immediately is complement or not
+		if (IsColComplement(cleft, cleft + 1) == true || IsColDuplicate(cleft, cleft + 1) == true)
+		{
+			setOfRemovals.insert(cleft + 1);
+			//cout << "Site " << cleft+1 << " is same/complement." << endl;
+		}
+		// Consider  next site
+		cleft++;
+	}
+	if (pRemovedSet != NULL)
+	{
+		pRemovedSet->clear();
+		*pRemovedSet = setOfRemovals;
+	}
+	// Finally, remove columns
+	RemoveColumns(setOfRemovals);
 }
 //#endif
 
 // Consolidate columns in matrix
-void BinaryMatrix::TrimDupSites(set<int> *pRemovedSites, bool fTrimSubsumbed) {
-  int c1, c2;
-  unsigned int r;
-  set<int> setOfDuplicates; // contains sites to be removed
-
-  for (c1 = 0; c1 < nCols; ++c1) {
-    for (c2 = c1 + 1; c2 < nCols; ++c2) {
-      // now we compare these two cols: c1, c2
-      // if they match, we put c2 into set
-      bool f = false;
-      for (r = 0; r < rowsArray.size(); ++r) {
-        // compare each cell
-        if (rowsArray[r][c1] != rowsArray[r][c2]) {
-          f = true;
-          break;
-        }
-      }
-
-      // Check against size
-      if (r == rowsArray.size()) {
-        // we find a duplicate
-        if (setOfDuplicates.find(c2) == setOfDuplicates.end()) {
-          //					cout <<  "Site " << c2 << " is duplicate of
-          //site
-          //"; 					cout << c1 << endl;
-        }
-        setOfDuplicates.insert(c2);
-      }
-    }
-  }
-  if (fTrimSubsumbed == true) {
-    // cout << "Now start to find subsumbed sites...\n";
-    FindSubsumedSites(setOfDuplicates);
-  }
-
-  // Now save the trimed sites info, if needed
-  if (pRemovedSites != NULL) {
-    *pRemovedSites = setOfDuplicates;
-  }
-
-  // Finally, remove columns
-  RemoveColumns(setOfDuplicates);
-}
-
-void BinaryMatrix ::TrimSubsumedRows() {
-  // Dump();
-  set<int> ssRows;
-  for (int r1 = 0; r1 < GetRowNum(); ++r1) {
-    for (int r2 = 0; r2 < GetRowNum(); ++r2) {
-      if (r1 == r2) {
-        continue;
-      }
-      if (IsRowSubsumedBy(r1, r2) == true) {
-        ssRows.insert(r1);
-      }
-    }
-  }
-  // cout << "ssRows = ";
-  // DumpIntSet( ssRows );
-  // if( ssRows.size() > 0 )
-  //{
-  //    exit(1);
-  //}
-  RemoveRows(ssRows);
-}
-
-bool BinaryMatrix ::IsRowSubsumedBy(int r1, int r2) {
-  // Test whether a row is subsumed by another row
-  bool fRes = true;
-  bool fEqual = true;
-
-  for (int c = 0; c < nCols; ++c) {
-    if (rowsArray[r1][c] != rowsArray[r2][c]) {
-      fEqual = false;
-      if (IsMissingValueBit(rowsArray[r1][c]) == false) {
-        fRes = false;
-        break;
-      }
-    }
-  }
-
-  if (fEqual == true) {
-    // do not consider two identical rows are subsumbed by another
-    return false;
-  }
-  return fRes;
-}
-
-bool BinaryMatrix ::IsColSubsumedBy(int c1, int c2) {
-  // Test whether a row is subsumed by another row
-  bool fRes = true;
-  bool fEqual = true;
-
-  for (int r = 0; r < GetRowNum(); ++r) {
-    if (rowsArray[r][c1] != rowsArray[r][c2]) {
-      fEqual = false;
-      if (IsMissingValueBit(rowsArray[r][c1]) == false) {
-        fRes = false;
-        break;
-      }
-    }
-  }
-
-  if (fEqual == true) {
-    // do not consider two identical rows are subsumbed by another
-    return false;
-  }
-  return fRes;
-}
-
-void BinaryMatrix ::FindSubsumedSites(set<int> &ssSites) {
-  // Dump();
-  for (int c1 = 0; c1 < GetColNum(); ++c1) {
-    for (int c2 = 0; c2 < GetColNum(); ++c2) {
-      if (c1 == c2) {
-        continue;
-      }
-      if (IsColSubsumedBy(c1, c2) == true) {
-        // cout << "site c1 = " << c1 << " is subsumed by c2 = " << c2 << endl;
-        ssSites.insert(c1);
-        break;
-      }
-    }
-  }
-  // cout << "ssSites = ";
-  // DumpIntSet( ssSites );
-  // if( ssSites.size() > 0 )
-  //{
-  //    exit(1);
-  //}
-}
-
-int BinaryMatrix ::FindDupRow() {
-  // This function tracking any removal of rows, but
-  // in addition to it, we track which rows remains
-  unsigned int r1, r2;
-  int c;
-
-  for (r1 = 0; r1 < rowsArray.size(); ++r1) {
-    for (r2 = r1 + 1; r2 < rowsArray.size(); ++r2) {
-      /*
-              Now test whether row 1 and row 2 are the same
-      */
-      bool fSame = true;
-      for (c = 0; c < nCols; ++c) {
-        if (rowsArray[r1][c] != rowsArray[r2][c]) {
-          fSame = false;
-          break;
-        }
-      }
-      if (fSame) {
-        // cout << "row " << r2 << " is duplicate." << endl;
-        return r2;
-      }
-    }
-  }
+void BinaryMatrix::TrimDupSites(set<int> *pRemovedSites, bool fTrimSubsumbed)
+{
+	int c1, c2;
+	unsigned int r;
+	set<int> setOfDuplicates; // contains sites to be removed
+
+	for (c1 = 0; c1 < nCols; ++c1)
+	{
+		for (c2 = c1 + 1; c2 < nCols; ++c2)
+		{
+			// now we compare these two cols: c1, c2
+			// if they match, we put c2 into set
+			bool f = false;
+			for (r = 0; r < rowsArray.size(); ++r)
+			{
+				// compare each cell
+				if (rowsArray[r][c1] != rowsArray[r][c2])
+				{
+					f = true;
+					break;
+				}
+			}
+
+			// Check against size
+			if (r == rowsArray.size())
+			{
+				// we find a duplicate
+				if (setOfDuplicates.find(c2) == setOfDuplicates.end())
+				{
+					//					cout <<  "Site " << c2 << " is duplicate of site ";
+					//					cout << c1 << endl;
+				}
+				setOfDuplicates.insert(c2);
+			}
+		}
+	}
+	if (fTrimSubsumbed == true)
+	{
+		//cout << "Now start to find subsumbed sites...\n";
+		FindSubsumedSites(setOfDuplicates);
+	}
+
+	// Now save the trimed sites info, if needed
+	if (pRemovedSites != NULL)
+	{
+		*pRemovedSites = setOfDuplicates;
+	}
+
+	// Finally, remove columns
+	RemoveColumns(setOfDuplicates);
+}
+
+void BinaryMatrix ::TrimSubsumedRows()
+{
+	//Dump();
+	set<int> ssRows;
+	for (int r1 = 0; r1 < GetRowNum(); ++r1)
+	{
+		for (int r2 = 0; r2 < GetRowNum(); ++r2)
+		{
+			if (r1 == r2)
+			{
+				continue;
+			}
+			if (IsRowSubsumedBy(r1, r2) == true)
+			{
+				ssRows.insert(r1);
+			}
+		}
+	}
+	//cout << "ssRows = ";
+	//DumpIntSet( ssRows );
+	//if( ssRows.size() > 0 )
+	//{
+	//    exit(1);
+	//}
+	RemoveRows(ssRows);
+}
+
+bool BinaryMatrix ::IsRowSubsumedBy(int r1, int r2)
+{
+	// Test whether a row is subsumed by another row
+	bool fRes = true;
+	bool fEqual = true;
+
+	for (int c = 0; c < nCols; ++c)
+	{
+		if (rowsArray[r1][c] != rowsArray[r2][c])
+		{
+			fEqual = false;
+			if (IsMissingValueBit(rowsArray[r1][c]) == false)
+			{
+				fRes = false;
+				break;
+			}
+		}
+	}
+
+	if (fEqual == true)
+	{
+		// do not consider two identical rows are subsumbed by another
+		return false;
+	}
+	return fRes;
+}
+
+bool BinaryMatrix ::IsColSubsumedBy(int c1, int c2)
+{
+	// Test whether a row is subsumed by another row
+	bool fRes = true;
+	bool fEqual = true;
+
+	for (int r = 0; r < GetRowNum(); ++r)
+	{
+		if (rowsArray[r][c1] != rowsArray[r][c2])
+		{
+			fEqual = false;
+			if (IsMissingValueBit(rowsArray[r][c1]) == false)
+			{
+				fRes = false;
+				break;
+			}
+		}
+	}
+
+	if (fEqual == true)
+	{
+		// do not consider two identical rows are subsumbed by another
+		return false;
+	}
+	return fRes;
+}
+
+void BinaryMatrix ::FindSubsumedSites(set<int> &ssSites)
+{
+	//Dump();
+	for (int c1 = 0; c1 < GetColNum(); ++c1)
+	{
+		for (int c2 = 0; c2 < GetColNum(); ++c2)
+		{
+			if (c1 == c2)
+			{
+				continue;
+			}
+			if (IsColSubsumedBy(c1, c2) == true)
+			{
+				//cout << "site c1 = " << c1 << " is subsumed by c2 = " << c2 << endl;
+				ssSites.insert(c1);
+				break;
+			}
+		}
+	}
+	//cout << "ssSites = ";
+	//DumpIntSet( ssSites );
+	//if( ssSites.size() > 0 )
+	//{
+	//    exit(1);
+	//}
+}
+
+int BinaryMatrix ::FindDupRow()
+{
+	// This function tracking any removal of rows, but
+	// in addition to it, we track which rows remains
+	unsigned int r1, r2;
+	int c;
+
+	for (r1 = 0; r1 < rowsArray.size(); ++r1)
+	{
+		for (r2 = r1 + 1; r2 < rowsArray.size(); ++r2)
+		{
+			/*
+				Now test whether row 1 and row 2 are the same
+			*/
+			bool fSame = true;
+			for (c = 0; c < nCols; ++c)
+			{
+				if (rowsArray[r1][c] != rowsArray[r2][c])
+				{
+					fSame = false;
+					break;
+				}
+			}
+			if (fSame)
+			{
+				//cout << "row " << r2 << " is duplicate." << endl;
+				return r2;
+			}
+		}
+	}
 
-  return -1;
+	return -1;
 }
 
-void BinaryMatrix ::FindNonInformativeSites(set<int> &sitesNoinfo) {
-  sitesNoinfo.clear();
+void BinaryMatrix ::FindNonInformativeSites(set<int> &sitesNoinfo)
+{
+	sitesNoinfo.clear();
 
-  // find set of non-informative sites
-  int c1;
-  unsigned int r;
+	// find set of non-informative sites
+	int c1;
+	unsigned int r;
 
-  for (c1 = 0; c1 < nCols; ++c1) {
-    int numZeros = 0, numOnes = 0;
-    // now we compare these two cols: c1, c2
-    // if they match, we put c2 into set
-    for (r = 0; r < rowsArray.size(); ++r) {
-      if (rowsArray[r][c1] == 0) {
-        numZeros++;
+	for (c1 = 0; c1 < nCols; ++c1)
+	{
+		int numZeros = 0, numOnes = 0;
+		// now we compare these two cols: c1, c2
+		// if they match, we put c2 into set
+		for (r = 0; r < rowsArray.size(); ++r)
+		{
+			if (rowsArray[r][c1] == 0)
+			{
+				numZeros++;
 #if 0
 				if(numZeros >=2 && numOnes >=2)
 				{
 					break;
 				}
 #endif
-      } else if (rowsArray[r][c1] == 1) {
-        numOnes++;
+			}
+			else if (rowsArray[r][c1] == 1)
+			{
+				numOnes++;
 #if 0
 				if(numZeros >=2 &&  numOnes >= 2)
 				{
 					break;
 				}
 #endif
-      }
-    }
-    // Check to see if this is non-informative
-    if (numZeros <= 1 || numOnes <= 1) {
-      // we find a duplicate
-      //			cout << "Site  " << c1+1 << "is non-informative"
-      //<< endl;
-      sitesNoinfo.insert(c1);
-    }
-  }
-}
-
-void BinaryMatrix ::FindUniformSites(set<int> &sitesUniform) const {
-  //
-  sitesUniform.clear();
-
-  // find set of non-informative sites
-  int c1;
-  unsigned int r;
-
-  for (c1 = 0; c1 < nCols; ++c1) {
-    int numZeros = 0, numOnes = 0;
-    // now we compare these two cols: c1, c2
-    // if they match, we put c2 into set
-    for (r = 0; r < rowsArray.size(); ++r) {
-      if (rowsArray[r][c1] == 0) {
-        numZeros++;
-      } else if (rowsArray[r][c1] == 1) {
-        numOnes++;
-      }
-    }
-    // Check to see if this is non-informative
-    if (numZeros == 0 || numOnes == 0) {
-      // we find a duplicate
-      //			cout << "Site  " << c1+1 << "is non-informative"
-      //<< endl;
-      sitesUniform.insert(c1);
-    }
-  }
+			}
+		}
+		// Check to see if this is non-informative
+		if (numZeros <= 1 || numOnes <= 1)
+		{
+			// we find a duplicate
+			//			cout << "Site  " << c1+1 << "is  non-informative" << endl;
+			sitesNoinfo.insert(c1);
+		}
+	}
+}
+
+void BinaryMatrix ::FindUniformSites(set<int> &sitesUniform) const
+{
+	//
+	sitesUniform.clear();
+
+	// find set of non-informative sites
+	int c1;
+	unsigned int r;
+
+	for (c1 = 0; c1 < nCols; ++c1)
+	{
+		int numZeros = 0, numOnes = 0;
+		// now we compare these two cols: c1, c2
+		// if they match, we put c2 into set
+		for (r = 0; r < rowsArray.size(); ++r)
+		{
+			if (rowsArray[r][c1] == 0)
+			{
+				numZeros++;
+			}
+			else if (rowsArray[r][c1] == 1)
+			{
+				numOnes++;
+			}
+		}
+		// Check to see if this is non-informative
+		if (numZeros == 0 || numOnes == 0)
+		{
+			// we find a duplicate
+			//			cout << "Site  " << c1+1 << "is  non-informative" << endl;
+			sitesUniform.insert(c1);
+		}
+	}
 }
 
 /*
-        Remove all non-informative sites
-        A site is non-informative if it is all 0 (1), or has only single 0(1)
+	Remove all non-informative sites
+	A site is non-informative if it is all 0 (1), or has only single 0(1)
 */
-bool BinaryMatrix ::TrimNonInformativeSites(set<int> *pRemovedSet) {
-  set<int> setOfDuplicates;
-  FindNonInformativeSites(setOfDuplicates);
-  if (pRemovedSet != NULL) {
-    *pRemovedSet = setOfDuplicates;
-  }
-
-  // Finally, remove columns
-  bool res = false;
-  if (setOfDuplicates.size() > 0) {
-    res = true;
-    RemoveColumns(setOfDuplicates);
-  }
-  return res;
-}
-
-void BinaryMatrix ::TrimUniformSites(set<int> *pRemovedSet) {
-  set<int> setOfDuplicates;
-  FindUniformSites(setOfDuplicates);
-  if (pRemovedSet != NULL) {
-    *pRemovedSet = setOfDuplicates;
-  }
-
-  // Finally, remove columns
-  if (setOfDuplicates.size() > 0) {
-    RemoveColumns(setOfDuplicates);
-  }
-}
-
-void BinaryMatrix ::TrimFullyCompatibleSites(set<int> *pRemovedSet) {
-  int c1, c2;
-  set<int> setOfDuplicates; // contains sites to be removed
-  for (c1 = 0; c1 < nCols; ++c1) {
-    // now we compare these two cols: c1, c2
-    // if they match, we put c2 into set
-    bool f = true; // by default, we say f is fully-compatible
-    // Now we test whether sites c1 is compatible with c2
-    for (c2 = 0; c2 < nCols; ++c2) {
-      if (IsCompatible(c1, c2) == false) {
-        f = false;
-        break;
-      }
-    }
-    if (f == true && IsColumnBinary(c1) == true) {
-      // cout << "Site " << c1+1 << " is fully compatible" << endl;
-      setOfDuplicates.insert(c1);
-    }
-  }
+bool BinaryMatrix ::TrimNonInformativeSites(set<int> *pRemovedSet)
+{
+	set<int> setOfDuplicates;
+	FindNonInformativeSites(setOfDuplicates);
+	if (pRemovedSet != NULL)
+	{
+		*pRemovedSet = setOfDuplicates;
+	}
+
+	// Finally, remove columns
+	bool res = false;
+	if (setOfDuplicates.size() > 0)
+	{
+		res = true;
+		RemoveColumns(setOfDuplicates);
+	}
+	return res;
+}
 
-  // Now remember the set if needed
-  if (pRemovedSet != NULL) {
-    pRemovedSet->clear();
-    *pRemovedSet = setOfDuplicates;
-  }
+void BinaryMatrix ::TrimUniformSites(set<int> *pRemovedSet)
+{
+	set<int> setOfDuplicates;
+	FindUniformSites(setOfDuplicates);
+	if (pRemovedSet != NULL)
+	{
+		*pRemovedSet = setOfDuplicates;
+	}
 
-  // Finally, remove columns
-  RemoveColumns(setOfDuplicates);
+	// Finally, remove columns
+	if (setOfDuplicates.size() > 0)
+	{
+		RemoveColumns(setOfDuplicates);
+	}
 }
 
-bool BinaryMatrix ::IsAllColumnsUnique() {
-  bool res = true;
+void BinaryMatrix ::TrimFullyCompatibleSites(set<int> *pRemovedSet)
+{
+	int c1, c2;
+	set<int> setOfDuplicates; // contains sites to be removed
+	for (c1 = 0; c1 < nCols; ++c1)
+	{
+		// now we compare these two cols: c1, c2
+		// if they match, we put c2 into set
+		bool f = true; // by default, we say f is fully-compatible
+		// Now we test whether sites c1 is compatible with c2
+		for (c2 = 0; c2 < nCols; ++c2)
+		{
+			if (IsCompatible(c1, c2) == false)
+			{
+				f = false;
+				break;
+			}
+		}
+		if (f == true && IsColumnBinary(c1) == true)
+		{
+			//cout << "Site " << c1+1 << " is fully compatible" << endl;
+			setOfDuplicates.insert(c1);
+		}
+	}
 
-  for (int i = 0; i < nCols - 1; ++i) {
-    for (int j = i + 1; j < nCols; ++j) {
-      // check to see if column i, j are duplicate
-      if (CmpColumns(i, j) == true) {
-        return false;
-      }
-    }
-  }
+	// Now remember the set if needed
+	if (pRemovedSet != NULL)
+	{
+		pRemovedSet->clear();
+		*pRemovedSet = setOfDuplicates;
+	}
 
-  return res;
+	// Finally, remove columns
+	RemoveColumns(setOfDuplicates);
 }
 
-bool BinaryMatrix ::IsColNonInformative(int c) {
-  int numZeros = 0, numOnes = 0, numMissing = 0;
-  // now we compare these two cols: c1, c2
-  // if they match, we put c2 into set
-  for (unsigned int r = 0; r < rowsArray.size(); ++r) {
-    if (rowsArray[r][c] == 0) {
-      numZeros++;
+bool BinaryMatrix ::IsAllColumnsUnique()
+{
+	bool res = true;
+
+	for (int i = 0; i < nCols - 1; ++i)
+	{
+		for (int j = i + 1; j < nCols; ++j)
+		{
+			// check to see if column i, j are duplicate
+			if (CmpColumns(i, j) == true)
+			{
+				return false;
+			}
+		}
+	}
+
+	return res;
+}
+
+bool BinaryMatrix ::IsColNonInformative(int c)
+{
+	int numZeros = 0, numOnes = 0, numMissing = 0;
+	// now we compare these two cols: c1, c2
+	// if they match, we put c2 into set
+	for (unsigned int r = 0; r < rowsArray.size(); ++r)
+	{
+		if (rowsArray[r][c] == 0)
+		{
+			numZeros++;
 #if 0
 			if(numZeros >=2 && numOnes >=2)
 			{
 				break;
 			}
 #endif
-    } else if (rowsArray[r][c] == 1) {
-      numOnes++;
+		}
+		else if (rowsArray[r][c] == 1)
+		{
+			numOnes++;
 #if 0
 			if(numZeros >=2 &&  numOnes >= 2)
 			{
 				break;
 			}
 #endif
-    } else if (IsMissingValueBit(rowsArray[r][c]) == true) {
-      numMissing++;
-    }
-  }
-  // Check to see if this is non-informative
-  if ((numZeros == 1 || numOnes == 1) && numMissing == 0) {
-    // we find a duplicate
-    //			cout << "Site  " << c1+1 << "is  non-informative" <<
-    // endl;
-    return true;
-  } else {
-    return false;
-  }
-}
-
-bool BinaryMatrix ::IsColNonInformative(int c, int *singletonState) {
-  int numZeros = 0, numOnes = 0;
-  // now we compare these two cols: c1, c2
-  // if they match, we put c2 into set
-  for (unsigned int r = 0; r < rowsArray.size(); ++r) {
-    if (rowsArray[r][c] == 0) {
-      numZeros++;
+		}
+		else if (IsMissingValueBit(rowsArray[r][c]) == true)
+		{
+			numMissing++;
+		}
+	}
+	// Check to see if this is non-informative
+	if ((numZeros == 1 || numOnes == 1) && numMissing == 0)
+	{
+		// we find a duplicate
+		//			cout << "Site  " << c1+1 << "is  non-informative" << endl;
+		return true;
+	}
+	else
+	{
+		return false;
+	}
+}
+
+bool BinaryMatrix ::IsColNonInformative(int c, int *singletonState)
+{
+	int numZeros = 0, numOnes = 0;
+	// now we compare these two cols: c1, c2
+	// if they match, we put c2 into set
+	for (unsigned int r = 0; r < rowsArray.size(); ++r)
+	{
+		if (rowsArray[r][c] == 0)
+		{
+			numZeros++;
 #if 0
 			if(numZeros >=2 && numOnes >=2)
 			{
 				break;
 			}
 #endif
-    } else if (rowsArray[r][c] == 1) {
-      numOnes++;
+		}
+		else if (rowsArray[r][c] == 1)
+		{
+			numOnes++;
 #if 0
 			if(numZeros >=2 &&  numOnes >= 2)
 			{
 				break;
 			}
 #endif
-    }
-  }
-  // Check to see if this is non-informative
-  if (numZeros == 1 || numOnes == 1) {
-    if (singletonState != NULL) {
-      if (numZeros == 1) {
-        *singletonState = 0;
-      } else {
-        *singletonState = 1;
-      }
-    }
-    // we find a duplicate
-    //			cout << "Site  " << c1+1 << "is  non-informative" <<
-    // endl;
-    return true;
-  } else {
-    return false;
-  }
-}
-
-bool BinaryMatrix ::IsColTrivial(int c) {
-  // check whether column c is trivial or not
-  // a column is trivial if the column is all 0 or all 1
-  bool hasZero = false;
-  bool hasOne = false;
-  for (int i = 0; i < GetRowNum(); ++i) {
-    if (rowsArray[i][c] == 0) {
-      hasZero = true;
-    } else {
-      hasOne = true;
-    }
-  }
-  if (hasZero && hasOne) {
-    return false;
-  } else {
-    return true;
-  }
-}
-
-void BinaryMatrix ::GetTrivialSites(vector<int> &trivSites) {
-  trivSites.clear();
-  for (int c = 0; c < GetColNum(); ++c) {
-    if (IsColTrivial(c) == true) {
-      trivSites.push_back(c);
-    }
-  }
+		}
+	}
+	// Check to see if this is non-informative
+	if (numZeros == 1 || numOnes == 1)
+	{
+		if (singletonState != NULL)
+		{
+			if (numZeros == 1)
+			{
+				*singletonState = 0;
+			}
+			else
+			{
+				*singletonState = 1;
+			}
+		}
+		// we find a duplicate
+		//			cout << "Site  " << c1+1 << "is  non-informative" << endl;
+		return true;
+	}
+	else
+	{
+		return false;
+	}
 }
 
-bool BinaryMatrix ::IsSequencesMatch(int r1, int r2, vector<int> &seqColPos) {
-  bool res = true;
-  // cout << "r1 = " << r1 << ", r2 = " << r2 << ", seqeucne lpocation are ";
-  // DumpIntVec( seqColPos );
+bool BinaryMatrix ::IsColTrivial(int c)
+{
+	// check whether column c is trivial or not
+	// a column is trivial if the column is all 0 or all 1
+	bool hasZero = false;
+	bool hasOne = false;
+	for (int i = 0; i < GetRowNum(); ++i)
+	{
+		if (rowsArray[i][c] == 0)
+		{
+			hasZero = true;
+		}
+		else
+		{
+			hasOne = true;
+		}
+	}
+	if (hasZero && hasOne)
+	{
+		return false;
+	}
+	else
+	{
+		return true;
+	}
+}
 
-  // This function test whether (non-continuous) sequences for two rows match or
-  // not
-  for (unsigned int i = 0; i < seqColPos.size(); ++i) {
-    if (rowsArray[r1][seqColPos[i]] != rowsArray[r2][seqColPos[i]]) {
-      res = false;
-      break;
-    }
-  }
-  return res;
+void BinaryMatrix ::GetTrivialSites(vector<int> &trivSites)
+{
+	trivSites.clear();
+	for (int c = 0; c < GetColNum(); ++c)
+	{
+		if (IsColTrivial(c) == true)
+		{
+			trivSites.push_back(c);
+		}
+	}
 }
 
-void BinaryMatrix ::GetSequencesDiffSites(int r1, int r2,
-                                          set<int> &seqColDiffs) const {
-  // colect the set of sites that the two rows are different
-  seqColDiffs.clear();
-  for (int c = 0; c < GetColNum(); ++c) {
-    if (rowsArray[r1][c] != rowsArray[r2][c]) {
-      seqColDiffs.insert(c);
-    }
-  }
+bool BinaryMatrix ::IsSequencesMatch(int r1, int r2, vector<int> &seqColPos)
+{
+	bool res = true;
+	//cout << "r1 = " << r1 << ", r2 = " << r2 << ", seqeucne lpocation are ";
+	//DumpIntVec( seqColPos );
+
+	// This function test whether (non-continuous) sequences for two rows match or not
+	for (unsigned int i = 0; i < seqColPos.size(); ++i)
+	{
+		if (rowsArray[r1][seqColPos[i]] != rowsArray[r2][seqColPos[i]])
+		{
+			res = false;
+			break;
+		}
+	}
+	return res;
 }
 
-bool BinaryMatrix ::IsZeroColumn(int c) {
-  bool res = true;
-  for (unsigned int i = 0; i < rowsArray.size(); ++i) {
-    if (rowsArray[i][c] == 1) {
-      res = false;
-      break;
-    }
-  }
-  return res;
+void BinaryMatrix ::GetSequencesDiffSites(int r1, int r2, set<int> &seqColDiffs) const
+{
+	// colect the set of sites that the two rows are different
+	seqColDiffs.clear();
+	for (int c = 0; c < GetColNum(); ++c)
+	{
+		if (rowsArray[r1][c] != rowsArray[r2][c])
+		{
+			seqColDiffs.insert(c);
+		}
+	}
 }
 
-int BinaryMatrix ::GetZeroColNum() {
-  int res = 0;
-  for (int i = 0; i < nCols; ++i) {
-    if (IsZeroColumn(i)) {
-      res++;
-    }
-  }
-  return res;
-}
-
-void BinaryMatrix ::BuildColEquivClasses() {
-  for (int i = 0; i < nCols; ++i) {
-    bool f = false;
-    for (COLUMN_EQUIV_CLASS::iterator it = setColEquiv.begin();
-         it != setColEquiv.end(); ++it) {
-      set<int> &s = *it;
-
-      // check to see if column i/j are the same
-      if (CmpColumns(i, *(s.begin())) == true) {
-        // remember this fact in the map
-        f = true;
-        s.insert(i);
-        break;
-      }
-    }
+bool BinaryMatrix ::IsZeroColumn(int c)
+{
+	bool res = true;
+	for (unsigned int i = 0; i < rowsArray.size(); ++i)
+	{
+		if (rowsArray[i][c] == 1)
+		{
+			res = false;
+			break;
+		}
+	}
+	return res;
+}
 
-    if (f == false) {
-      // Create a new set
-      set<int> s1;
-      s1.insert(i);
-      setColEquiv.push_back(s1);
-    }
-  }
-}
-
-void BinaryMatrix ::GetUniqueColsInRange(int c1, int c2, set<int> &setUniques) {
-  // make sure equiv class is pre-processed
-  if (setColEquiv.empty()) {
-    BuildColEquivClasses();
-  }
-
-  // exam each column equivlance classes
-  // Put the mostly diesired (for now, it is the one near the center)
-  // into result set (which must be in range)
-  int center = (c1 + c2) / 2;
-  for (unsigned int i = 0; i < setColEquiv.size(); ++i) {
-    set<int> &s = setColEquiv[i];
-    int cand = -100;
-    for (set<int>::iterator it = s.begin(); it != s.end(); ++it) {
-      int c = *it;
-      if (c >= c1 && c <= c2 && abs(cand - center) > abs(c - center)) {
-        cand = c;
-      }
-    }
-    if (cand >= 0) {
-      setUniques.insert(cand);
-    }
-  }
+int BinaryMatrix ::GetZeroColNum()
+{
+	int res = 0;
+	for (int i = 0; i < nCols; ++i)
+	{
+		if (IsZeroColumn(i))
+		{
+			res++;
+		}
+	}
+	return res;
 }
 
-bool BinaryMatrix ::IsPerfectPhylogeny() {
-  for (int i = 0; i < nCols - 1; ++i) {
-    for (int j = i + 1; j < nCols; ++j) {
-      if (IsCompatible(i, j) == false) {
-        // cout << "Site i=" << i << ", j=" << j << " are incompatible.\n";
-        return false;
-      }
-    }
-  }
-  return true;
-}
-
-void BinaryMatrix ::ConstructConflictGraph(UnWeightedGraph &graph) {
-  // Conflict graph vertex num = # of columns
-  // Edge is whether col i conflict with col j
-  LIST_VERTEX vertList;
-  LIST_EDGE edgeList;
-
-  for (int i = 0; i < nCols; ++i) {
-    char buf[100];
-    buf[0] = 'c';
-    sprintf(&buf[1], "%d", i + 1);
-    BGVertex v(buf);
-    vertList.push_back(v);
-  }
-  graph.SetVertices(vertList);
-
-  // Now check for all pair of columns for conflict
-  for (int i = 0; i < nCols - 1; ++i) {
-    for (int j = i + 1; j < nCols; ++j) {
-      if (IsCompatible(i, j) == false) {
-        // cout << "Add one edge (" << i << " , " << j << ")" << endl;
-        BGEdge eg("e", i, j, graph.GetListVerts());
-        edgeList.push_back(eg);
-      }
-    }
-  }
+void BinaryMatrix ::BuildColEquivClasses()
+{
+	for (int i = 0; i < nCols; ++i)
+	{
+		bool f = false;
+		for (COLUMN_EQUIV_CLASS::iterator it = setColEquiv.begin(); it != setColEquiv.end(); ++it)
+		{
+			set<int> &s = *it;
 
-  // Finally, setup the vertex\edge lists
-  graph.SetEdges(edgeList);
+			// check to see if column i/j are the same
+			if (CmpColumns(i, *(s.begin())) == true)
+			{
+				// remember this fact in the map
+				f = true;
+				s.insert(i);
+				break;
+			}
+		}
+
+		if (f == false)
+		{
+			// Create a new set
+			set<int> s1;
+			s1.insert(i);
+			setColEquiv.push_back(s1);
+		}
+	}
 }
 
-bool BinaryMatrix ::IsColumnBinary(int c) const {
-  for (int i = 0; i < GetRowNum(); ++i) {
-    if (rowsArray[i][c] != 0 && rowsArray[i][c] != 1) {
-      return false;
-    }
-  }
-  return true;
+void BinaryMatrix ::GetUniqueColsInRange(int c1, int c2, set<int> &setUniques)
+{
+	// make sure equiv class is pre-processed
+	if (setColEquiv.empty())
+	{
+		BuildColEquivClasses();
+	}
+
+	// exam each column equivlance classes
+	// Put the mostly diesired (for now, it is the one near the center)
+	// into result set (which must be in range)
+	int center = (c1 + c2) / 2;
+	for (unsigned int i = 0; i < setColEquiv.size(); ++i)
+	{
+		set<int> &s = setColEquiv[i];
+		int cand = -100;
+		for (set<int>::iterator it = s.begin(); it != s.end(); ++it)
+		{
+			int c = *it;
+			if (c >= c1 && c <= c2 && abs(cand - center) > abs(c - center))
+			{
+				cand = c;
+			}
+		}
+		if (cand >= 0)
+		{
+			setUniques.insert(cand);
+		}
+	}
 }
 
-bool BinaryMatrix ::IsRowBinary(int r) const {
-  for (int i = 0; i < nCols; ++i) {
-    if (rowsArray[r][i] != 0 && rowsArray[r][i] != 1) {
-      return false;
-    }
-  }
-  return true;
+bool BinaryMatrix ::IsPerfectPhylogeny()
+{
+	for (int i = 0; i < nCols - 1; ++i)
+	{
+		for (int j = i + 1; j < nCols; ++j)
+		{
+			if (IsCompatible(i, j) == false)
+			{
+				//cout << "Site i=" << i << ", j=" << j << " are incompatible.\n";
+				return false;
+			}
+		}
+	}
+	return true;
 }
 
-void BinaryMatrix ::TrimNonBinaryRows() {
-  set<int> setOfDuplicates;
-  setOfDuplicates.clear();
-  unsigned int r1;
-  // int c;
+void BinaryMatrix ::ConstructConflictGraph(UnWeightedGraph &graph)
+{
+	// Conflict graph vertex num = # of columns
+	// Edge is whether col i conflict with col j
+	LIST_VERTEX vertList;
+	LIST_EDGE edgeList;
 
-  for (r1 = 0; r1 < rowsArray.size(); ++r1) {
-    if (IsRowBinary(r1) == false) {
-      // The row with duplicated rows are treated the same
-      // cout << "row " << r2 << " is not binary." << endl;
-      setOfDuplicates.insert(r1);
-    }
-  }
-  /*
-          Now we remove all duplicate rows
-  */
-  RemoveRows(setOfDuplicates);
+	for (int i = 0; i < nCols; ++i)
+	{
+		char buf[100];
+		buf[0] = 'c';
+		sprintf(&buf[1], "%d", i + 1);
+		BGVertex v(buf);
+		vertList.push_back(v);
+	}
+	graph.SetVertices(vertList);
+
+	// Now check for all pair of columns for conflict
+	for (int i = 0; i < nCols - 1; ++i)
+	{
+		for (int j = i + 1; j < nCols; ++j)
+		{
+			if (IsCompatible(i, j) == false)
+			{
+				//cout << "Add one edge (" << i << " , " << j << ")" << endl;
+				BGEdge eg("e", i, j, graph.GetListVerts());
+				edgeList.push_back(eg);
+			}
+		}
+	}
+
+	// Finally, setup the vertex\edge lists
+	graph.SetEdges(edgeList);
 }
 
-bool BinaryMatrix ::IsRowRangeBinary(int r, int left, int right) {
-  for (int i = left; i <= right; ++i) {
-    if (rowsArray[r][i] == 2) {
-      return false;
-    }
-  }
+bool BinaryMatrix ::IsColumnBinary(int c) const
+{
+	for (int i = 0; i < GetRowNum(); ++i)
+	{
+		if (rowsArray[i][c] != 0 && rowsArray[i][c] != 1)
+		{
+			return false;
+		}
+	}
+	return true;
+}
+
+bool BinaryMatrix ::IsRowBinary(int r) const
+{
+	for (int i = 0; i < nCols; ++i)
+	{
+		if (rowsArray[r][i] != 0 && rowsArray[r][i] != 1)
+		{
+			return false;
+		}
+	}
+	return true;
+}
+
+void BinaryMatrix ::TrimNonBinaryRows()
+{
+	set<int> setOfDuplicates;
+	setOfDuplicates.clear();
+	unsigned int r1;
+	//int c;
+
+	for (r1 = 0; r1 < rowsArray.size(); ++r1)
+	{
+		if (IsRowBinary(r1) == false)
+		{
+			// The row with duplicated rows are treated the same
+			//cout << "row " << r2 << " is not binary." << endl;
+			setOfDuplicates.insert(r1);
+		}
+	}
+	/*
+		Now we remove all duplicate rows
+	*/
+	RemoveRows(setOfDuplicates);
+}
+
+bool BinaryMatrix ::IsRowRangeBinary(int r, int left, int right)
+{
+	for (int i = left; i <= right; ++i)
+	{
+		if (rowsArray[r][i] == 2)
+		{
+			return false;
+		}
+	}
 
-  return true;
+	return true;
 }
 
-////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////
 //		Inernal utility functions
-////////////////////////////////////////////////////////////////////////////////
-
-bool BinaryMatrix ::IsCompatible(int c1, int c2) {
-  bool f00 = false;
-  bool f01 = false;
-  bool f10 = false;
-  bool f11 = false;
-
-  // if c1==c2, we assume it is compatible
-  if (c1 == c2) {
-    return true;
-  }
-#if 0 // no, acutally, we need to be more cautious, unless we see evidence, we
-      // put it
-      // For now, if a column is not binary, we consider it is not compatible
+///////////////////////////////////////////////////////////////////////////////////////////
+
+bool BinaryMatrix ::IsCompatible(int c1, int c2)
+{
+	bool f00 = false;
+	bool f01 = false;
+	bool f10 = false;
+	bool f11 = false;
+
+	// if c1==c2, we assume it is compatible
+	if (c1 == c2)
+	{
+		return true;
+	}
+#if 0 // no, acutally, we need to be more cautious, unless we see evidence, we put it
+	// For now, if a column is not binary, we consider it is not compatible
 	if( IsColumnBinary(c1) == false || IsColumnBinary(c2) == false)
 	{
 		return false;
 	}
 #endif
-  // 4-gamet test
-  for (unsigned int i = 0; i < rowsArray.size(); ++i) {
-    if (rowsArray[i][c1] == 0 && rowsArray[i][c2] == 0) {
-      f00 = true;
-    }
-    if (rowsArray[i][c1] == 0 && rowsArray[i][c2] == 1) {
-      f01 = true;
-    }
-    if (rowsArray[i][c1] == 1 && rowsArray[i][c2] == 0) {
-      f10 = true;
-    }
-    if (rowsArray[i][c1] == 1 && rowsArray[i][c2] == 1) {
-      f11 = true;
-    }
-  }
-
-  // Now check to see if all flags are set
-  if (f00 && f01 && f10 && f11)
-    return false;
-  else
-    return true;
-}
-
-bool BinaryMatrix ::IsCompatibleRooted(int c1, int c2, int rallele1,
-                                       int rallele2) {
-  bool f00 = false;
-  bool f01 = false;
-  bool f10 = false;
-  bool f11 = false;
-
-  // if c1==c2, we assume it is compatible
-  if (c1 == c2) {
-    return true;
-  }
-
-  // 3-gamet test
-  for (unsigned int i = 0; i < rowsArray.size(); ++i) {
-    if (rowsArray[i][c1] == rallele1 && rowsArray[i][c2] == rallele2) {
-      f00 = true;
-    }
-    if (rowsArray[i][c1] == rallele1 && rowsArray[i][c2] != rallele2) {
-      f01 = true;
-    }
-    if (rowsArray[i][c1] != rallele1 && rowsArray[i][c2] == rallele2) {
-      f10 = true;
-    }
-    if (rowsArray[i][c1] != rallele1 && rowsArray[i][c2] != rallele2) {
-      f11 = true;
-    }
-  }
+	// 4-gamet test
+	for (unsigned int i = 0; i < rowsArray.size(); ++i)
+	{
+		if (rowsArray[i][c1] == 0 && rowsArray[i][c2] == 0)
+		{
+			f00 = true;
+		}
+		if (rowsArray[i][c1] == 0 && rowsArray[i][c2] == 1)
+		{
+			f01 = true;
+		}
+		if (rowsArray[i][c1] == 1 && rowsArray[i][c2] == 0)
+		{
+			f10 = true;
+		}
+		if (rowsArray[i][c1] == 1 && rowsArray[i][c2] == 1)
+		{
+			f11 = true;
+		}
+	}
 
-  // Now check to see if all flags are set
-  if (f01 && f10 && f11)
-    return false;
-  else
-    return true;
+	// Now check to see if all flags are set
+	if (f00 && f01 && f10 && f11)
+		return false;
+	else
+		return true;
 }
 
-bool BinaryMatrix ::IsSiteCompatibleWithRegion(int s, int rc1, int rc2) {
-  bool res = true;
-  for (int rci = rc1; rci <= rc2; ++rci) {
-    if (IsCompatible(s, rci) == false) {
-      res = false;
-      break;
-    }
-  }
-  return res;
+bool BinaryMatrix ::IsCompatibleRooted(int c1, int c2, int rallele1, int rallele2)
+{
+	bool f00 = false;
+	bool f01 = false;
+	bool f10 = false;
+	bool f11 = false;
+
+	// if c1==c2, we assume it is compatible
+	if (c1 == c2)
+	{
+		return true;
+	}
+
+	// 3-gamet test
+	for (unsigned int i = 0; i < rowsArray.size(); ++i)
+	{
+		if (rowsArray[i][c1] == rallele1 && rowsArray[i][c2] == rallele2)
+		{
+			f00 = true;
+		}
+		if (rowsArray[i][c1] == rallele1 && rowsArray[i][c2] != rallele2)
+		{
+			f01 = true;
+		}
+		if (rowsArray[i][c1] != rallele1 && rowsArray[i][c2] == rallele2)
+		{
+			f10 = true;
+		}
+		if (rowsArray[i][c1] != rallele1 && rowsArray[i][c2] != rallele2)
+		{
+			f11 = true;
+		}
+	}
+
+	// Now check to see if all flags are set
+	if (f01 && f10 && f11)
+		return false;
+	else
+		return true;
 }
 
-bool BinaryMatrix ::IsRegionFullyCompatible(int rc1, int rc2) {
-  for (int rci = rc1; rci <= rc2; ++rci) {
-    for (int rcj = rci + 1; rcj <= rc2; ++rcj) {
-      if (IsCompatible(rci, rcj) == false) {
-        return false;
-      }
-    }
-  }
-  return true;
-}
-
-void BinaryMatrix ::GetGamates(int c1, int c2, bool &f00, bool &f01, bool &f10,
-                               bool &f11) {
-  // init to all false upon start
-  f00 = false;
-  f01 = false;
-  f10 = false;
-  f11 = false;
-
-  // if c1==c2, we assume it is compatible
-  if (c1 == c2) {
-    return;
-  }
-
-  // 4-gamet test
-  for (unsigned int i = 0; i < rowsArray.size(); ++i) {
-    if (rowsArray[i][c1] == 0 && rowsArray[i][c2] == 0) {
-      f00 = true;
-    }
-    if (rowsArray[i][c1] == 0 && rowsArray[i][c2] == 1) {
-      f01 = true;
-    }
-    if (rowsArray[i][c1] == 1 && rowsArray[i][c2] == 0) {
-      f10 = true;
-    }
-    if (rowsArray[i][c1] == 1 && rowsArray[i][c2] == 1) {
-      f11 = true;
-    }
-  }
+bool BinaryMatrix ::IsSiteCompatibleWithRegion(int s, int rc1, int rc2)
+{
+	bool res = true;
+	for (int rci = rc1; rci <= rc2; ++rci)
+	{
+		if (IsCompatible(s, rci) == false)
+		{
+			res = false;
+			break;
+		}
+	}
+	return res;
 }
 
-bool BinaryMatrix ::IsColComplement(int c1, int c2) {
-  for (unsigned int i = 0; i < rowsArray.size(); ++i) {
-    // cout << "[i, c1] = " << rowsArray[i][c1] << ", rowsArray[i][c2] = " <<
-    // rowsArray[i][c2] << endl;
-    if (rowsArray[i][c1] == rowsArray[i][c2]) {
-      return false;
-    }
-  }
-  // cout << "col " << c1 << ", " << c2 << " are compl.\n";
-  return true;
-}
-bool BinaryMatrix ::IsColDuplicate(int c1, int c2) {
-  for (unsigned int i = 0; i < rowsArray.size(); ++i) {
-    if (rowsArray[i][c1] != rowsArray[i][c2]) {
-      return false;
-    }
-  }
-  // cout << "col " << c1 << ", " << c2 << " are identical.\n";
-  return true;
-}
-
-void BinaryMatrix ::GetAllIncompatiblePairs(
-    set<pair<int, int> > &incompatibles) {
-  incompatibles.clear();
-  for (int i = 0; i < nCols; i++) {
-    for (int j = i + 1; j < nCols; ++j) {
-      // Test to see if site i, j are compatible
-      if (IsCompatible(i, j) == false) {
-        pair<int, int> p(i, j);
-        incompatibles.insert(p);
-      }
-    }
-  }
-}
-
-int BinaryMatrix ::ComputeHKBound() {
-  // The idea is to test for incompatible between each column
-  // Then create an incompatibility map, and compute the bound
-  map<INTERVAL, int> bounds;
-
-  int nCols = GetColNum();
-  int nRows = GetRowNum();
-  if (nCols <= 1 || nRows <= 3) {
-    return 0;
-  }
-
-  for (int i = 0; i < nCols - 1; ++i) {
-    for (int j = i + 1; j < nCols; ++j) {
-      // Check if site i, j conflict
-      int val = 0;
-      if (IsCompatible(i, j) == false) {
-        val = 1;
-      }
-      INTERVAL iv(i, j);
-      bounds.insert(map<INTERVAL, int>::value_type(iv, val));
-    }
-  }
-  vector<int> locBreakpoints; // do not really need this, but...
-  return CalcCompositeBound(bounds, 0, nCols - 1, locBreakpoints);
-}
-
-int BinaryMatrix ::ComputeFastHapBound() {
-  // Simply test for each submatrix for a rough haplotype bound
-  // Then create an incompatibility map, and compute the bound
-  // To speed things up, we do not perform optimal RecMin
-  // Rather simply no-subset
-
-  map<INTERVAL, int> bounds;
-
-  int nc = GetColNum();
-  int nr = GetRowNum();
-  if (nc <= 1 || nr <= 3) {
-    return 0;
-  }
-
-  for (int i = 0; i < nc - 1; ++i) {
-    for (int j = i + 1; j < nc; ++j) {
-      // Check if site i, j conflict
-      int val = 0;
-
-      BinaryMatrix submat;
-      SubMatrix(0, GetRowNum() - 1, i, j, submat);
-      submat.TrimFullyCompatibleSites();
-      submat.TrimDupRows();
-
-      val = submat.GetRowNum() - submat.GetColNum() - 1;
-      if (val < 0) {
-        val = 0;
-      }
-
-      INTERVAL iv(i, j);
-      bounds.insert(map<INTERVAL, int>::value_type(iv, val));
-      // cout << "interval " << i << ", " << j  << " quick bd = " << val <<
-      // endl;
-    }
-  }
-  vector<int> locBreakpoints; // do not really need this, but...
-  return CalcCompositeBound(bounds, 0, nc - 1, locBreakpoints);
-}
-
-// This function computes a fast recombination upper bound, which can be useful
-// in applications like branch and bound The idea is to remove a sequence from
-// inputmat a time, and take the min to recombine them
-int BinaryMatrix ::ComputeFastRecombUpperBound() {
-  // Create a new sequence for operation
-  BinaryMatrix matToOp = *this;
-
-  int res = 0;
-  // Whenver the matrix is too small, we stop
-  while (true) {
-    // First perform cleanup: drop non-informatives rows, collapse identical
-    // rows
-    set<int> setOfRemoved;
-    matToOp.TrimFullyCompatibleSites(&setOfRemoved);
-    matToOp.FindNgbrDupCompSites(&setOfRemoved);
-    matToOp.RemoveColumns(setOfRemoved);
-    matToOp.TrimDupRows();
-
-    if (matToOp.GetRowNum() <= 3) {
-      break;
-    }
+bool BinaryMatrix ::IsRegionFullyCompatible(int rc1, int rc2)
+{
+	for (int rci = rc1; rci <= rc2; ++rci)
+	{
+		for (int rcj = rci + 1; rcj <= rc2; ++rcj)
+		{
+			if (IsCompatible(rci, rcj) == false)
+			{
+				return false;
+			}
+		}
+	}
+	return true;
+}
 
-    // Find the smallest cost row
-    int minRmCost = HAP_MAX_INT;
-    int minRow = -1;
-    // Try every leftover row in matToOp
-    for (int r = 0; r < matToOp.GetRowNum(); ++r) {
-      //            SEQUENCE row;
-      //            matToOp.GetRow( r, row );
-      int recCost = matToOp.ComputeMinRecombWeight(r);
-      if (recCost < minRmCost) {
-        minRmCost = recCost;
-        minRow = r;
-      }
-    }
-    YW_ASSERT_INFO(minRow >= 0, "Error: minRow must be updated at least once.");
-    // cout << "minRmCost = " << minRmCost << ", minRow = " << minRow << endl;
-    // Now we remove this sequence
-    res += minRmCost;
-    set<int> seqsToRemove;
-    seqsToRemove.insert(minRow);
-    matToOp.RemoveRows(seqsToRemove);
-  }
-  // cout << "A fast recomb. upper bound = " << res << endl;
-  return res;
-}
-
-int BinaryMatrix ::ComputeMinRecombWeight(int rowIndex) {
-  // This function computes a recombination number given the rows in matrix
-  // that are ancesters of rowIndex
-  // This function computes the minimum recombination weight for the given
-  // hapRow when restricted to interval [left, right] in mat
-  int res = 0;
-  // cout << "ComputeMinRecombWeight :: rowIndex = " << rowIndex << endl;
-  // cout <<"matrix here is: ";
-  // Dump();
-  set<int> lastTrackRows; // set of rows that matching the hapRow
-
-  // Initially every row is a match
-  for (int i = 0; i < GetRowNum(); ++i) {
-    if (i != rowIndex) {
-      lastTrackRows.insert(i);
-    }
-  }
-
-  for (int curpos = 0; curpos < GetColNum(); ++curpos) {
-    // Each time, we intersect the set with the sets matching the current bit
-    set<int> trackRows;
-    for (int i = 0; i < GetRowNum(); ++i) {
-      if (i == rowIndex) {
-        continue;
-      }
-
-      if (GetValAt(i, curpos) == GetValAt(rowIndex, curpos)) {
-        // Yes, this row matches
-        trackRows.insert(i);
-      }
-    }
-    YW_ASSERT_INFO(trackRows.size() > 0, "trackRows must contain some rows.");
-
-    // Now we test if there is intersection, if non-empty, we contiinue
-    set<int> sint;
-    JoinSets(trackRows, lastTrackRows, sint);
-    if (sint.size() == 0) {
-      // No intersection, so we have to increase the result (we know there must
-      // be one recomb here, from the right-maximal proof)
-      ++res;
-
-      // Re-initialize lastTrackRows here
-      lastTrackRows = trackRows;
-      //            PopulateSetWithInterval( lastTrackRows, 0, mat.size() - 1 );
-    } else {
-      // In this case, we still continue
-      lastTrackRows = sint;
-    }
-  }
+void BinaryMatrix ::GetGamates(int c1, int c2, bool &f00, bool &f01, bool &f10, bool &f11)
+{
+	// init to all false upon start
+	f00 = false;
+	f01 = false;
+	f10 = false;
+	f11 = false;
 
-  // cout << "Min recomb = " << res << endl;
-  return res;
+	// if c1==c2, we assume it is compatible
+	if (c1 == c2)
+	{
+		return;
+	}
+
+	// 4-gamet test
+	for (unsigned int i = 0; i < rowsArray.size(); ++i)
+	{
+		if (rowsArray[i][c1] == 0 && rowsArray[i][c2] == 0)
+		{
+			f00 = true;
+		}
+		if (rowsArray[i][c1] == 0 && rowsArray[i][c2] == 1)
+		{
+			f01 = true;
+		}
+		if (rowsArray[i][c1] == 1 && rowsArray[i][c2] == 0)
+		{
+			f10 = true;
+		}
+		if (rowsArray[i][c1] == 1 && rowsArray[i][c2] == 1)
+		{
+			f11 = true;
+		}
+	}
 }
 
-int BinaryMatrix ::GetMajorityState(int site) {
-  int res = 0;
-  for (int r = 0; r < GetRowNum(); ++r) {
-    if (GetValAt(r, site) == 0) {
-      res++;
-    }
-  }
-  if (res >= (GetRowNum() + 1) / 2) {
-    return 0;
-  } else {
-    return 1;
-  }
-}
-
-int BinaryMatrix ::GetMinorStateNum(int site, int &minorState) const {
-  int res = 0;
-  for (int r = 0; r < GetRowNum(); ++r) {
-    if (GetValAt(r, site) == 0) {
-      res++;
-    }
-  }
-  if (res >= (GetRowNum() + 1) / 2) {
-    minorState = 1;
-    return GetRowNum() - res;
-  } else {
-    minorState = 0;
-    return res;
-  }
-}
-
-void BinaryMatrix ::GetMinorStateRows(int site, int &minorState,
-                                      set<int> &listRowsWMinor) const {
-  GetMinorStateNum(site, minorState);
-  for (int r = 0; r < GetRowNum(); ++r) {
-    if (GetValAt(r, site) == minorState) {
-      listRowsWMinor.insert(r);
-    }
-  }
+bool BinaryMatrix ::IsColComplement(int c1, int c2)
+{
+	for (unsigned int i = 0; i < rowsArray.size(); ++i)
+	{
+		//cout << "[i, c1] = " << rowsArray[i][c1] << ", rowsArray[i][c2] = " << rowsArray[i][c2] << endl;
+		if (rowsArray[i][c1] == rowsArray[i][c2])
+		{
+			return false;
+		}
+	}
+	//cout << "col " << c1 << ", " << c2 << " are compl.\n";
+	return true;
+}
+bool BinaryMatrix ::IsColDuplicate(int c1, int c2)
+{
+	for (unsigned int i = 0; i < rowsArray.size(); ++i)
+	{
+		if (rowsArray[i][c1] != rowsArray[i][c2])
+		{
+			return false;
+		}
+	}
+	//cout << "col " << c1 << ", " << c2 << " are identical.\n";
+	return true;
 }
 
-void BinaryMatrix ::GetRowsWithAllele(int site, int alleleState,
-                                      set<int> &setRows) const {
-  //
-  setRows.clear();
-  for (int r = 0; r < GetRowNum(); ++r) {
-    if (GetValAt(r, site) == alleleState) {
-      setRows.insert(r);
-    }
-  }
-}
-
-int BinaryMatrix ::GetTheOtherAllele(int allele) {
-  //
-  if (allele == 0) {
-    return 1;
-  } else {
-    return 0;
-  }
-}
-
-void BinaryMatrix ::ConfigZeroMajSeq() {
-  // make majority elem all-0 for each position
-  //
-  for (int c = 0; c < GetColNum(); ++c) {
-    int mc = GetMajorityState(c);
-    if (mc == 1) {
-      // switch it
-      for (int r = 0; r < GetRowNum(); ++r) {
-        //
-        if (GetValAt(r, c) == 0) {
-          rowsArray[r][c] = 1;
-        } else {
-          rowsArray[r][c] = 0;
-        }
-      }
-    }
-  }
-}
-
-void BinaryMatrix ::ConfigZeroAncesSeq(const vector<int> &seqAnces) {
-  // if seqAnces[i] = 1, then swap 0/1 in the matrix
-  YW_ASSERT_INFO((int)seqAnces.size() == GetColNum(), "Size: mismatch2");
-  for (int c = 0; c < GetColNum(); ++c) {
-    int mc = seqAnces[c];
-    if (mc == 1) {
-      // switch it
-      for (int r = 0; r < GetRowNum(); ++r) {
-        //
-        if (GetValAt(r, c) == 0) {
-          rowsArray[r][c] = 1;
-        } else {
-          rowsArray[r][c] = 0;
-        }
-      }
-    }
-  }
-}
-
-void BinaryMatrix ::DumpConvGenotypes() {
-  // for 00: 1
-  YW_ASSERT_INFO((GetRowNum() % 2) == 0,
-                 "To get genotypes, must have EVEN number of rows");
-
-  cout << "Converted genotype: " << GetRowNum() / 2 << " by " << GetColNum()
-       << " sites\n";
-
-  for (int i = 0; i < GetRowNum(); i += 2) {
-    for (int c = 0; c < GetColNum(); ++c) {
-      if (GetValAt(i, c) == 0 && GetValAt(i + 1, c) == 0) {
-        cout << "0";
-      } else if (GetValAt(i, c) == 1 && GetValAt(i + 1, c) == 1) {
-        cout << "1";
-      } else {
-        cout << "2";
-      }
-    }
-    cout << endl;
-  }
-}
-
-void BinaryMatrix ::GreedyRemoveIncompatSites(BinaryMatrix &matReduced) {
-  // greedily remove incompatible sites (i.e. first remove site that is
-  // incompatible w/ most sites and continue) approach: try to find some subset
-  // of columns that fits the perfect phylogeny; and use that to estimate the
-  // number of migrations hopefully this works reasonably well for low
-  // reombinaiton rates
-  vector<vector<bool> > listPairCompatibles;
-
-  //
-  listPairCompatibles.resize(this->GetColNum());
-  for (int s1 = 0; s1 < this->GetColNum(); ++s1) {
-    listPairCompatibles[s1].resize(this->GetColNum());
-    for (int s2 = s1 + 1; s2 < this->GetColNum(); ++s2) {
-      listPairCompatibles[s1][s2] = IsCompatible(s1, s2);
-    }
-  }
-  // keep track of which sites are incompaiblw with which
-  vector<set<int> > listIncompatSitesPerSite(this->GetColNum());
-  for (int s1 = 0; s1 < this->GetColNum(); ++s1) {
-    listPairCompatibles[s1].resize(this->GetColNum());
-    for (int s2 = s1 + 1; s2 < this->GetColNum(); ++s2) {
-      if (listPairCompatibles[s1][s2] == false) {
-        //
-        listIncompatSitesPerSite[s1].insert(s2);
-        listIncompatSitesPerSite[s2].insert(s1);
-      }
-    }
-  }
-  // cout << "List of incompatible sites: \n";
-  // for( int jj=0; jj<(int)listIncompatSitesPerSite.size(); ++jj )
-  //{
-  // cout << "site: " << jj << ": ";
-  // DumpIntSet(listIncompatSitesPerSite[jj]);
-  //}
-
-  // remove the matrix sites by dropping the one w/ largest incompatible pairs
-  // until all sites become compatible w/ each other
-  set<int> setChosenRemoveSites;
-  while (true) {
-    // find the site w/ largest incompat sites
-    vector<int> listIncSize;
-    for (int ii = 0; ii < (int)listIncompatSitesPerSite.size(); ++ii) {
-      listIncSize.push_back(listIncompatSitesPerSite[ii].size());
-    }
-    int sChosen = std::max_element(listIncSize.begin(), listIncSize.end()) -
-                  listIncSize.begin();
-    int siteChosen = sChosen;
-    if (listIncSize[siteChosen] == 0) {
-      // all remaining sites are compatible. Stop
-      break;
-    }
-    // cout << "List of inompat size: ";
-    // DumpIntVec(listIncSize);
-    // cout << "Choosen site: " << siteChosen << endl;
-
-    // add this site; then remove this site from each incomp site list
-    setChosenRemoveSites.insert(siteChosen);
-    listIncompatSitesPerSite[siteChosen].clear();
-    for (int jj = 0; jj < (int)listIncompatSitesPerSite.size(); ++jj) {
-      listIncompatSitesPerSite[jj].erase(siteChosen);
-    }
-  }
-  // cout << "List of sites to remove: ";
-  // DumpIntSet(setChosenRemoveSites);
-  //
-  vector<int> listKeptSites;
-  for (int s1 = 0; s1 < (int)this->GetColNum(); ++s1) {
-    //
-    if (setChosenRemoveSites.find(s1) == setChosenRemoveSites.end()) {
-      listKeptSites.push_back(s1);
-    }
-  }
-  YW_ASSERT_INFO(listKeptSites.size() > 0, "ListKeptSites: wrong");
-  SubMatrixSelectedSites(listKeptSites, matReduced);
-  // cout << "GreedyRemoveIncompatSites: original mat = ";
-  // this->Dump();
-  // cout << "After removing incompatible sites greedyly, matrix = ";
-  // matReduced.Dump();
-}
-
-void BinaryMatrix ::CalcSFS(vector<double> &listSFSFrac) const {
-  // compute SFS; that is, list[i] = frac of sites with minor allele (assumed to
-  // be 1) appears i times note: assume 0 is ancestral
-  listSFSFrac.clear();
-  int numRows = GetRowNum();
-  for (int r = 0; r <= numRows; ++r) {
-    listSFSFrac.push_back(0.0);
-  }
-  int numCols = GetColNum();
-  for (int s = 0; s < GetColNum(); ++s) {
-    //
-    int minorState;
-    int numTimes = GetMinorStateNum(s, minorState);
-    if (minorState == 0) {
-      //
-      numTimes = numRows - numTimes;
-    }
-    YW_ASSERT_INFO(numTimes >= 0 && numTimes <= numRows, "Wrong");
-    listSFSFrac[numTimes] += 1.0 / numCols;
-  }
+void BinaryMatrix ::GetAllIncompatiblePairs(set<pair<int, int>> &incompatibles)
+{
+	incompatibles.clear();
+	for (int i = 0; i < nCols; i++)
+	{
+		for (int j = i + 1; j < nCols; ++j)
+		{
+			// Test to see if site i, j are compatible
+			if (IsCompatible(i, j) == false)
+			{
+				pair<int, int> p(i, j);
+				incompatibles.insert(p);
+			}
+		}
+	}
 }
 
-int BinaryMatrix ::GetDiffSitesForTwoRows(int r1, int r2) const {
-  //
-  int res = 0;
-  for (int c = 0; c < GetColNum(); ++c) {
-    if (GetValAt(r1, c) != GetValAt(r2, c)) {
-      ++res;
-    }
-  }
-  return res;
+int BinaryMatrix ::ComputeHKBound()
+{
+	// The idea is to test for incompatible between each column
+	// Then create an incompatibility map, and compute the bound
+	map<INTERVAL, int> bounds;
+
+	int nCols = GetColNum();
+	int nRows = GetRowNum();
+	if (nCols <= 1 || nRows <= 3)
+	{
+		return 0;
+	}
+
+	for (int i = 0; i < nCols - 1; ++i)
+	{
+		for (int j = i + 1; j < nCols; ++j)
+		{
+			// Check if site i, j conflict
+			int val = 0;
+			if (IsCompatible(i, j) == false)
+			{
+				val = 1;
+			}
+			INTERVAL iv(i, j);
+			bounds.insert(map<INTERVAL, int>::value_type(iv, val));
+		}
+	}
+	vector<int> locBreakpoints; // do not really need this, but...
+	return CalcCompositeBound(bounds, 0, nCols - 1, locBreakpoints);
+}
+
+int BinaryMatrix ::ComputeFastHapBound()
+{
+	// Simply test for each submatrix for a rough haplotype bound
+	// Then create an incompatibility map, and compute the bound
+	// To speed things up, we do not perform optimal RecMin
+	// Rather simply no-subset
+
+	map<INTERVAL, int> bounds;
+
+	int nc = GetColNum();
+	int nr = GetRowNum();
+	if (nc <= 1 || nr <= 3)
+	{
+		return 0;
+	}
+
+	for (int i = 0; i < nc - 1; ++i)
+	{
+		for (int j = i + 1; j < nc; ++j)
+		{
+			// Check if site i, j conflict
+			int val = 0;
+
+			BinaryMatrix submat;
+			SubMatrix(0, GetRowNum() - 1, i, j, submat);
+			submat.TrimFullyCompatibleSites();
+			submat.TrimDupRows();
+
+			val = submat.GetRowNum() - submat.GetColNum() - 1;
+			if (val < 0)
+			{
+				val = 0;
+			}
+
+			INTERVAL iv(i, j);
+			bounds.insert(map<INTERVAL, int>::value_type(iv, val));
+			//cout << "interval " << i << ", " << j  << " quick bd = " << val << endl;
+		}
+	}
+	vector<int> locBreakpoints; // do not really need this, but...
+	return CalcCompositeBound(bounds, 0, nc - 1, locBreakpoints);
+}
+
+// This function computes a fast recombination upper bound, which can be useful in applications like branch and bound
+// The idea is to remove a sequence from inputmat a time, and take the min to recombine them
+int BinaryMatrix ::ComputeFastRecombUpperBound()
+{
+	// Create a new sequence for operation
+	BinaryMatrix matToOp = *this;
+
+	int res = 0;
+	// Whenver the matrix is too small, we stop
+	while (true)
+	{
+		// First perform cleanup: drop non-informatives rows, collapse identical rows
+		set<int> setOfRemoved;
+		matToOp.TrimFullyCompatibleSites(&setOfRemoved);
+		matToOp.FindNgbrDupCompSites(&setOfRemoved);
+		matToOp.RemoveColumns(setOfRemoved);
+		matToOp.TrimDupRows();
+
+		if (matToOp.GetRowNum() <= 3)
+		{
+			break;
+		}
+
+		// Find the smallest cost row
+		int minRmCost = HAP_MAX_INT;
+		int minRow = -1;
+		// Try every leftover row in matToOp
+		for (int r = 0; r < matToOp.GetRowNum(); ++r)
+		{
+			//            SEQUENCE row;
+			//            matToOp.GetRow( r, row );
+			int recCost = matToOp.ComputeMinRecombWeight(r);
+			if (recCost < minRmCost)
+			{
+				minRmCost = recCost;
+				minRow = r;
+			}
+		}
+		YW_ASSERT_INFO(minRow >= 0, "Error: minRow must be updated at least once.");
+		//cout << "minRmCost = " << minRmCost << ", minRow = " << minRow << endl;
+		// Now we remove this sequence
+		res += minRmCost;
+		set<int> seqsToRemove;
+		seqsToRemove.insert(minRow);
+		matToOp.RemoveRows(seqsToRemove);
+	}
+	//cout << "A fast recomb. upper bound = " << res << endl;
+	return res;
+}
+
+int BinaryMatrix ::ComputeMinRecombWeight(int rowIndex)
+{
+	// This function computes a recombination number given the rows in matrix
+	// that are ancesters of rowIndex
+	// This function computes the minimum recombination weight for the given hapRow
+	// when restricted to interval [left, right] in mat
+	int res = 0;
+	//cout << "ComputeMinRecombWeight :: rowIndex = " << rowIndex << endl;
+	//cout <<"matrix here is: ";
+	//Dump();
+	set<int> lastTrackRows; // set of rows that matching the hapRow
+
+	// Initially every row is a match
+	for (int i = 0; i < GetRowNum(); ++i)
+	{
+		if (i != rowIndex)
+		{
+			lastTrackRows.insert(i);
+		}
+	}
+
+	for (int curpos = 0; curpos < GetColNum(); ++curpos)
+	{
+		// Each time, we intersect the set with the sets matching the current bit
+		set<int> trackRows;
+		for (int i = 0; i < GetRowNum(); ++i)
+		{
+			if (i == rowIndex)
+			{
+				continue;
+			}
+
+			if (GetValAt(i, curpos) == GetValAt(rowIndex, curpos))
+			{
+				// Yes, this row matches
+				trackRows.insert(i);
+			}
+		}
+		YW_ASSERT_INFO(trackRows.size() > 0, "trackRows must contain some rows.");
+
+		// Now we test if there is intersection, if non-empty, we contiinue
+		set<int> sint;
+		JoinSets(trackRows, lastTrackRows, sint);
+		if (sint.size() == 0)
+		{
+			// No intersection, so we have to increase the result (we know there must be one recomb
+			// here, from the right-maximal proof)
+			++res;
+
+			// Re-initialize lastTrackRows here
+			lastTrackRows = trackRows;
+			//            PopulateSetWithInterval( lastTrackRows, 0, mat.size() - 1 );
+		}
+		else
+		{
+			// In this case, we still continue
+			lastTrackRows = sint;
+		}
+	}
+
+	//cout << "Min recomb = " << res << endl;
+	return res;
+}
+
+int BinaryMatrix ::GetMajorityState(int site)
+{
+	int res = 0;
+	for (int r = 0; r < GetRowNum(); ++r)
+	{
+		if (GetValAt(r, site) == 0)
+		{
+			res++;
+		}
+	}
+	if (res >= (GetRowNum() + 1) / 2)
+	{
+		return 0;
+	}
+	else
+	{
+		return 1;
+	}
+}
+
+int BinaryMatrix ::GetMinorStateNum(int site, int &minorState) const
+{
+	int res = 0;
+	for (int r = 0; r < GetRowNum(); ++r)
+	{
+		if (GetValAt(r, site) == 0)
+		{
+			res++;
+		}
+	}
+	if (res >= (GetRowNum() + 1) / 2)
+	{
+		minorState = 1;
+		return GetRowNum() - res;
+	}
+	else
+	{
+		minorState = 0;
+		return res;
+	}
+}
+
+void BinaryMatrix ::GetMinorStateRows(int site, int &minorState, set<int> &listRowsWMinor) const
+{
+	GetMinorStateNum(site, minorState);
+	for (int r = 0; r < GetRowNum(); ++r)
+	{
+		if (GetValAt(r, site) == minorState)
+		{
+			listRowsWMinor.insert(r);
+		}
+	}
+}
+
+void BinaryMatrix ::GetRowsWithAllele(int site, int alleleState, set<int> &setRows) const
+{
+	//
+	setRows.clear();
+	for (int r = 0; r < GetRowNum(); ++r)
+	{
+		if (GetValAt(r, site) == alleleState)
+		{
+			setRows.insert(r);
+		}
+	}
+}
+
+int BinaryMatrix ::GetTheOtherAllele(int allele)
+{
+	//
+	if (allele == 0)
+	{
+		return 1;
+	}
+	else
+	{
+		return 0;
+	}
+}
+
+void BinaryMatrix ::ConfigZeroMajSeq()
+{
+	// make majority elem all-0 for each position
+	//
+	for (int c = 0; c < GetColNum(); ++c)
+	{
+		int mc = GetMajorityState(c);
+		if (mc == 1)
+		{
+			// switch it
+			for (int r = 0; r < GetRowNum(); ++r)
+			{
+				//
+				if (GetValAt(r, c) == 0)
+				{
+					rowsArray[r][c] = 1;
+				}
+				else
+				{
+					rowsArray[r][c] = 0;
+				}
+			}
+		}
+	}
+}
+
+void BinaryMatrix ::ConfigZeroAncesSeq(const vector<int> &seqAnces)
+{
+	// if seqAnces[i] = 1, then swap 0/1 in the matrix
+	YW_ASSERT_INFO((int)seqAnces.size() == GetColNum(), "Size: mismatch2");
+	for (int c = 0; c < GetColNum(); ++c)
+	{
+		int mc = seqAnces[c];
+		if (mc == 1)
+		{
+			// switch it
+			for (int r = 0; r < GetRowNum(); ++r)
+			{
+				//
+				if (GetValAt(r, c) == 0)
+				{
+					rowsArray[r][c] = 1;
+				}
+				else
+				{
+					rowsArray[r][c] = 0;
+				}
+			}
+		}
+	}
+}
+
+void BinaryMatrix ::DumpConvGenotypes()
+{
+	// for 00: 1
+	YW_ASSERT_INFO((GetRowNum() % 2) == 0, "To get genotypes, must have EVEN number of rows");
+
+	cout << "Converted genotype: " << GetRowNum() / 2 << " by " << GetColNum() << " sites\n";
+
+	for (int i = 0; i < GetRowNum(); i += 2)
+	{
+		for (int c = 0; c < GetColNum(); ++c)
+		{
+			if (GetValAt(i, c) == 0 && GetValAt(i + 1, c) == 0)
+			{
+				cout << "0";
+			}
+			else if (GetValAt(i, c) == 1 && GetValAt(i + 1, c) == 1)
+			{
+				cout << "1";
+			}
+			else
+			{
+				cout << "2";
+			}
+		}
+		cout << endl;
+	}
+}
+
+void BinaryMatrix ::GreedyRemoveIncompatSites(BinaryMatrix &matReduced)
+{
+	// greedily remove incompatible sites (i.e. first remove site that is incompatible w/ most sites and continue)
+	// approach: try to find some subset of columns that fits the perfect phylogeny; and use that to estimate the number of migrations
+	// hopefully this works reasonably well for low reombinaiton rates
+	vector<vector<bool>> listPairCompatibles;
+
+	//
+	listPairCompatibles.resize(this->GetColNum());
+	for (int s1 = 0; s1 < this->GetColNum(); ++s1)
+	{
+		listPairCompatibles[s1].resize(this->GetColNum());
+		for (int s2 = s1 + 1; s2 < this->GetColNum(); ++s2)
+		{
+			listPairCompatibles[s1][s2] = IsCompatible(s1, s2);
+		}
+	}
+	// keep track of which sites are incompaiblw with which
+	vector<set<int>> listIncompatSitesPerSite(this->GetColNum());
+	for (int s1 = 0; s1 < this->GetColNum(); ++s1)
+	{
+		listPairCompatibles[s1].resize(this->GetColNum());
+		for (int s2 = s1 + 1; s2 < this->GetColNum(); ++s2)
+		{
+			if (listPairCompatibles[s1][s2] == false)
+			{
+				//
+				listIncompatSitesPerSite[s1].insert(s2);
+				listIncompatSitesPerSite[s2].insert(s1);
+			}
+		}
+	}
+	//cout << "List of incompatible sites: \n";
+	//for( int jj=0; jj<(int)listIncompatSitesPerSite.size(); ++jj )
+	//{
+	//cout << "site: " << jj << ": ";
+	//DumpIntSet(listIncompatSitesPerSite[jj]);
+	//}
+
+	// remove the matrix sites by dropping the one w/ largest incompatible pairs until all sites become compatible w/ each other
+	set<int> setChosenRemoveSites;
+	while (true)
+	{
+		// find the site w/ largest incompat sites
+		vector<int> listIncSize;
+		for (int ii = 0; ii < (int)listIncompatSitesPerSite.size(); ++ii)
+		{
+			listIncSize.push_back(listIncompatSitesPerSite[ii].size());
+		}
+		int sChosen = std::max_element(listIncSize.begin(), listIncSize.end()) - listIncSize.begin();
+		int siteChosen = sChosen;
+		if (listIncSize[siteChosen] == 0)
+		{
+			// all remaining sites are compatible. Stop
+			break;
+		}
+		//cout << "List of inompat size: ";
+		//DumpIntVec(listIncSize);
+		//cout << "Choosen site: " << siteChosen << endl;
+
+		// add this site; then remove this site from each incomp site list
+		setChosenRemoveSites.insert(siteChosen);
+		listIncompatSitesPerSite[siteChosen].clear();
+		for (int jj = 0; jj < (int)listIncompatSitesPerSite.size(); ++jj)
+		{
+			listIncompatSitesPerSite[jj].erase(siteChosen);
+		}
+	}
+	//cout << "List of sites to remove: ";
+	//DumpIntSet(setChosenRemoveSites);
+	//
+	vector<int> listKeptSites;
+	for (int s1 = 0; s1 < (int)this->GetColNum(); ++s1)
+	{
+		//
+		if (setChosenRemoveSites.find(s1) == setChosenRemoveSites.end())
+		{
+			listKeptSites.push_back(s1);
+		}
+	}
+	YW_ASSERT_INFO(listKeptSites.size() > 0, "ListKeptSites: wrong");
+	SubMatrixSelectedSites(listKeptSites, matReduced);
+	//cout << "GreedyRemoveIncompatSites: original mat = ";
+	//this->Dump();
+	//cout << "After removing incompatible sites greedyly, matrix = ";
+	//matReduced.Dump();
+}
+
+void BinaryMatrix ::CalcSFS(vector<double> &listSFSFrac) const
+{
+	// compute SFS; that is, list[i] = frac of sites with minor allele (assumed to be 1) appears i times
+	// note: assume 0 is ancestral
+	listSFSFrac.clear();
+	int numRows = GetRowNum();
+	for (int r = 0; r <= numRows; ++r)
+	{
+		listSFSFrac.push_back(0.0);
+	}
+	int numCols = GetColNum();
+	for (int s = 0; s < GetColNum(); ++s)
+	{
+		//
+		int minorState;
+		int numTimes = GetMinorStateNum(s, minorState);
+		if (minorState == 0)
+		{
+			//
+			numTimes = numRows - numTimes;
+		}
+		YW_ASSERT_INFO(numTimes >= 0 && numTimes <= numRows, "Wrong");
+		listSFSFrac[numTimes] += 1.0 / numCols;
+	}
+}
+
+int BinaryMatrix ::GetDiffSitesForTwoRows(int r1, int r2) const
+{
+	//
+	int res = 0;
+	for (int c = 0; c < GetColNum(); ++c)
+	{
+		if (GetValAt(r1, c) != GetValAt(r2, c))
+		{
+			++res;
+		}
+	}
+	return res;
 }
 
-double BinaryMatrix ::CalcAvePairRowsDiff() const {
-  // average pairwise diff (normalized by row length)
+double BinaryMatrix ::CalcAvePairRowsDiff() const
+{
+	// average pairwise diff (normalized by row length)
 #if 0
     double res = 0.0;
     int numPairs = 0;
@@ -1244,239 +1507,255 @@ double BinaryMatrix ::CalcAvePairRowsDiff() const {
     return res/( GetColNum()* numPairs);
 #endif
 
-  // use a faster approach
-  // first accumlate the num of 1s in the first i rows at each site
-  vector<vector<int> > vecNum1sAtSites(GetRowNum());
-  for (int r = 0; r < (int)vecNum1sAtSites.size(); ++r) {
-    // accumlate for each col
-    for (int c = 0; c < GetColNum(); ++c) {
-      int num1s = 0;
-      if (r > 0) {
-        num1s = vecNum1sAtSites[r - 1][c];
-      }
-      if (GetValAt(r, c) == 1) {
-        ++num1s;
-      }
-      vecNum1sAtSites[r].push_back(num1s);
-    }
-  }
-  // now accumate diffs
-  double totDiffs = 0.0;
-  for (int r = 1; r < GetRowNum(); ++r) {
-    // calc tot diffs here
-    for (int c = 0; c < GetColNum(); ++c) {
-      int stepVal = 0;
-      if (GetValAt(r, c) == 0) {
-        stepVal = vecNum1sAtSites[r - 1][c];
-      } else {
-        stepVal = r - vecNum1sAtSites[r - 1][c];
-      }
-      YW_ASSERT_INFO(stepVal >= 0, "Cannot be negative");
-      totDiffs += stepVal;
-    }
-  }
-  int numPairs = GetRowNum() * (GetRowNum() - 1) / 2;
-  return totDiffs / (GetColNum() * numPairs);
-}
-
-double BinaryMatrix ::CalcAvePairRowsDiffBetween(const set<int> &rowsSet1,
-                                                 const set<int> &rowsSet2,
-                                                 double &resMinDiffOut) const {
-  //
-  double res = 0.0;
-  int numPairs = 0;
-  double resMaxDiff = GetColNum();
-  for (set<int>::iterator it1 = rowsSet1.begin(); it1 != rowsSet1.end();
-       ++it1) {
-    int r1 = *it1;
-    for (set<int>::iterator it2 = rowsSet2.begin(); it2 != rowsSet2.end();
-         ++it2) {
-      int r2 = *it2;
-      ++numPairs;
-      int valdiff = GetDiffSitesForTwoRows(r1, r2);
-      res += valdiff;
-      if (resMaxDiff > valdiff) {
-        resMaxDiff = valdiff;
-      }
-    }
-  }
-  resMinDiffOut = resMaxDiff / GetColNum();
-  return res / (GetColNum() * numPairs);
-}
-
-void BinaryMatrix ::CollectAllPairwiseDiffs(
-    const set<int> &rowsSet1, const set<int> &rowsSet2,
-    vector<double> &listRowPairsDiff) const {
-  //
-  listRowPairsDiff.clear();
-  for (set<int>::iterator it1 = rowsSet1.begin(); it1 != rowsSet1.end();
-       ++it1) {
-    int r1 = *it1;
-    for (set<int>::iterator it2 = rowsSet2.begin(); it2 != rowsSet2.end();
-         ++it2) {
-      int r2 = *it2;
-      int valdiff = GetDiffSitesForTwoRows(r1, r2);
-      listRowPairsDiff.push_back(((double)valdiff) / GetColNum());
-    }
-  }
-  // sort results
-  SortDoubleVec(listRowPairsDiff);
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-void GetNoninformativeRowsInMat(const BinaryMatrix &mat, set<int> &trimedRows,
-                                vector<REMOVED_ROWS_INFO> &trimedRowInfo,
-                                set<int> &trimedCols, BinaryMatrix &matUpdated,
-                                bool fRmDup) {
-  //
-  BinaryMatrix matUse = mat;
-
-  //
-  trimedRows.clear();
-  trimedRowInfo.clear();
-
-  // we perform trimming dup rows and then noninformative columns, repeatively.
-  // Stop when there is no more work to do
-  vector<int> curRowsRemoved;
-  vector<int> curColsRemoved;
-  while (true) {
-    // cout << "cur mat = ";
-    // matUse.Dump();
-    // simply check to see if anything can be done
-    // first remove non-inform sites
-    //
-
-    set<int> removedCols;
-    matUse.FindNonInformativeSites(removedCols);
-    // cout << "Removed cols = ";
-    // DumpIntSet( removedCols );
-
-    // now also remove dup sites
-    if (fRmDup == true) {
-      set<int> sitesDupRm;
-      matUse.FindNgbrDupCompSites(&sitesDupRm);
-      // cout << "Dup sites removed: ";
-      // DumpIntSet( sitesDupRm );
-      // also remember sites being trimmed
-      UnionSets(removedCols, sitesDupRm);
-    }
+	// use a faster approach
+	// first accumlate the num of 1s in the first i rows at each site
+	vector<vector<int>> vecNum1sAtSites(GetRowNum());
+	for (int r = 0; r < (int)vecNum1sAtSites.size(); ++r)
+	{
+		// accumlate for each col
+		for (int c = 0; c < GetColNum(); ++c)
+		{
+			int num1s = 0;
+			if (r > 0)
+			{
+				num1s = vecNum1sAtSites[r - 1][c];
+			}
+			if (GetValAt(r, c) == 1)
+			{
+				++num1s;
+			}
+			vecNum1sAtSites[r].push_back(num1s);
+		}
+	}
+	// now accumate diffs
+	double totDiffs = 0.0;
+	for (int r = 1; r < GetRowNum(); ++r)
+	{
+		// calc tot diffs here
+		for (int c = 0; c < GetColNum(); ++c)
+		{
+			int stepVal = 0;
+			if (GetValAt(r, c) == 0)
+			{
+				stepVal = vecNum1sAtSites[r - 1][c];
+			}
+			else
+			{
+				stepVal = r - vecNum1sAtSites[r - 1][c];
+			}
+			YW_ASSERT_INFO(stepVal >= 0, "Cannot be negative");
+			totDiffs += stepVal;
+		}
+	}
+	int numPairs = GetRowNum() * (GetRowNum() - 1) / 2;
+	return totDiffs / (GetColNum() * numPairs);
+}
 
-    // now removed stuff
-    matUse.RemoveColumns(removedCols);
+double BinaryMatrix ::CalcAvePairRowsDiffBetween(const set<int> &rowsSet1, const set<int> &rowsSet2, double &resMinDiffOut) const
+{
+	//
+	double res = 0.0;
+	int numPairs = 0;
+	double resMaxDiff = GetColNum();
+	for (set<int>::iterator it1 = rowsSet1.begin(); it1 != rowsSet1.end(); ++it1)
+	{
+		int r1 = *it1;
+		for (set<int>::iterator it2 = rowsSet2.begin(); it2 != rowsSet2.end(); ++it2)
+		{
+			int r2 = *it2;
+			++numPairs;
+			int valdiff = GetDiffSitesForTwoRows(r1, r2);
+			res += valdiff;
+			if (resMaxDiff > valdiff)
+			{
+				resMaxDiff = valdiff;
+			}
+		}
+	}
+	resMinDiffOut = resMaxDiff / GetColNum();
+	return res / (GetColNum() * numPairs);
+}
 
-    if (removedCols.size() > 0) {
-      // do the same for cols
-      vector<int> remColsVec;
-      PopulateVecBySet(remColsVec, removedCols);
-      vector<int> posOrigCol;
-      RecoverOrigIndicesAfterDeletion(curColsRemoved, remColsVec, posOrigCol);
-      AppendIntVec(curColsRemoved, posOrigCol);
-    }
+void BinaryMatrix ::CollectAllPairwiseDiffs(const set<int> &rowsSet1, const set<int> &rowsSet2, vector<double> &listRowPairsDiff) const
+{
+	//
+	listRowPairsDiff.clear();
+	for (set<int>::iterator it1 = rowsSet1.begin(); it1 != rowsSet1.end(); ++it1)
+	{
+		int r1 = *it1;
+		for (set<int>::iterator it2 = rowsSet2.begin(); it2 != rowsSet2.end(); ++it2)
+		{
+			int r2 = *it2;
+			int valdiff = GetDiffSitesForTwoRows(r1, r2);
+			listRowPairsDiff.push_back(((double)valdiff) / GetColNum());
+		}
+	}
+	// sort results
+	SortDoubleVec(listRowPairsDiff);
+}
 
-    // now see if anything becomes identical
-    set<int> removedRows;
-    vector<pair<int, int> > listRowRemInfo;
-    matUse.TrimDupRows(&removedRows, &listRowRemInfo);
-    // cout << "Trimmed rows = ";
-    // DumpIntSet(removedRows);
-    // for(int jjj=0; jjj<(int)listRowRemInfo.size(); ++jjj)
-    //{
-    // cout << "Deleting row " << listRowRemInfo[jjj].first << ", since exists a
-    // duplicate " << listRowRemInfo[jjj].second << endl;
-    //}
-    // remember which rows are rmeoved and which row it gets its value from
-    if (removedRows.size() > 0) {
-      REMOVED_ROWS_INFO rri;
-      rri.rowsRemoved = removedRows;
-      rri.pairsRmKeepRows = listRowRemInfo;
-      trimedRowInfo.push_back(rri);
-    }
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////
+void GetNoninformativeRowsInMat(const BinaryMatrix &mat, set<int> &trimedRows, vector<REMOVED_ROWS_INFO> &trimedRowInfo,
+								set<int> &trimedCols, BinaryMatrix &matUpdated, bool fRmDup)
+{
+	//
+	BinaryMatrix matUse = mat;
+
+	//
+	trimedRows.clear();
+	trimedRowInfo.clear();
+
+	// we perform trimming dup rows and then noninformative columns, repeatively.
+	// Stop when there is no more work to do
+	vector<int> curRowsRemoved;
+	vector<int> curColsRemoved;
+	while (true)
+	{
+		//cout << "cur mat = ";
+		//matUse.Dump();
+		// simply check to see if anything can be done
+		// first remove non-inform sites
+		//
+
+		set<int> removedCols;
+		matUse.FindNonInformativeSites(removedCols);
+		//cout << "Removed cols = ";
+		//DumpIntSet( removedCols );
+
+		// now also remove dup sites
+		if (fRmDup == true)
+		{
+			set<int> sitesDupRm;
+			matUse.FindNgbrDupCompSites(&sitesDupRm);
+			//cout << "Dup sites removed: ";
+			//DumpIntSet( sitesDupRm );
+			// also remember sites being trimmed
+			UnionSets(removedCols, sitesDupRm);
+		}
+
+		// now removed stuff
+		matUse.RemoveColumns(removedCols);
+
+		if (removedCols.size() > 0)
+		{
+			// do the same for cols
+			vector<int> remColsVec;
+			PopulateVecBySet(remColsVec, removedCols);
+			vector<int> posOrigCol;
+			RecoverOrigIndicesAfterDeletion(curColsRemoved, remColsVec, posOrigCol);
+			AppendIntVec(curColsRemoved, posOrigCol);
+		}
+
+		// now see if anything becomes identical
+		set<int> removedRows;
+		vector<pair<int, int>> listRowRemInfo;
+		matUse.TrimDupRows(&removedRows, &listRowRemInfo);
+		//cout << "Trimmed rows = ";
+		//DumpIntSet(removedRows);
+		//for(int jjj=0; jjj<(int)listRowRemInfo.size(); ++jjj)
+		//{
+		//cout << "Deleting row " << listRowRemInfo[jjj].first << ", since exists a duplicate " << listRowRemInfo[jjj].second << endl;
+		//}
+		// remember which rows are rmeoved and which row it gets its value from
+		if (removedRows.size() > 0)
+		{
+			REMOVED_ROWS_INFO rri;
+			rri.rowsRemoved = removedRows;
+			rri.pairsRmKeepRows = listRowRemInfo;
+			trimedRowInfo.push_back(rri);
+		}
+
+		// stop if found nothing
+		if (removedRows.size() == 0)
+		{
+			break;
+		}
+		//cout << "Removed these rows: ";
+		//DumpIntSet(removedRows);
+		// save it
+		vector<int> remRowsVec;
+		PopulateVecBySet(remRowsVec, removedRows);
+
+		vector<int> posOrig;
+		RecoverOrigIndicesAfterDeletion(curRowsRemoved, remRowsVec, posOrig);
+
+		// append finally
+		AppendIntVec(curRowsRemoved, posOrig);
+	}
+	//cout << "Finally, removed rows are: ";
+	//DumpIntVec(curRowsRemoved);
+	//cout << "Finally, removed cols are: ";
+	//DumpIntVec(curColsRemoved);
+	// after trimming redundent rows
+	//cout << "After trimming, matrix rows = ";
+	//matUse.Dump();
+
+	// conver to set
+	PopulateSetByVec(trimedRows, curRowsRemoved);
+
+	// also other output
+	matUpdated = matUse;
+	PopulateSetByVec(trimedCols, curColsRemoved);
+}
 
-    // stop if found nothing
-    if (removedRows.size() == 0) {
-      break;
-    }
-    // cout << "Removed these rows: ";
-    // DumpIntSet(removedRows);
-    // save it
-    vector<int> remRowsVec;
-    PopulateVecBySet(remRowsVec, removedRows);
-
-    vector<int> posOrig;
-    RecoverOrigIndicesAfterDeletion(curRowsRemoved, remRowsVec, posOrig);
-
-    // append finally
-    AppendIntVec(curRowsRemoved, posOrig);
-  }
-  // cout << "Finally, removed rows are: ";
-  // DumpIntVec(curRowsRemoved);
-  // cout << "Finally, removed cols are: ";
-  // DumpIntVec(curColsRemoved);
-  // after trimming redundent rows
-  // cout << "After trimming, matrix rows = ";
-  // matUse.Dump();
-
-  // conver to set
-  PopulateSetByVec(trimedRows, curRowsRemoved);
-
-  // also other output
-  matUpdated = matUse;
-  PopulateSetByVec(trimedCols, curColsRemoved);
-}
-
-void SplitMatrixIntoMaximalFullyCompatRegs(
-    const BinaryMatrix &mat, vector<pair<int, int> > &listFullyCompatRegs) {
-  BinaryMatrix &matInst = const_cast<BinaryMatrix &>(mat);
-
-  // divide a (potentially very large) matrix into maximal fully compatible
-  // regions
-  int posLeft = 0;
-  int posCur = posLeft + 1;
-  while (posCur < mat.GetColNum()) {
-    // check compaibility for all previous ones
-    bool fFullyCompat = true;
-    for (int c = posLeft; c < posCur; ++c) {
-      if (matInst.IsCompatible(c, posCur) == false) {
-        fFullyCompat = false;
-        break;
-      }
-    }
-    if (fFullyCompat == false) {
-      pair<int, int> pp(posLeft, posCur - 1);
-      listFullyCompatRegs.push_back(pp);
-      posLeft = posCur;
-    }
-    ++posCur;
-  }
-  // add last segment if remain a lst one
-  pair<int, int> pp(posLeft, mat.GetColNum() - 1);
-  listFullyCompatRegs.push_back(pp);
-}
-
-void ReadSitePosFromFirstRowInFile(const char *filename, int numSites,
-                                   vector<double> &listSitePos) {
-  //
-  ifstream inFile(filename);
-  if (!inFile) {
-    YW_ASSERT_INFO(false, "Fatal error: cannot open the file");
-  }
-  string whitespace = " ";
-  int MAX_NUM_SITES = 102400;
-  const int BUF_SZ = MAX_NUM_SITES * sizeof(int);
-  char buf[BUF_SZ];
-  inFile.getline(buf, BUF_SZ);
-  string strbuf(buf);
-  size_t strEnd = strbuf.find_last_not_of(whitespace);
-  strbuf = strbuf.substr(0, strEnd);
-  std::istringstream is(strbuf);
-  listSitePos.clear();
-  while (is.eof() == false) {
-    double pos;
-    is >> pos;
-    listSitePos.push_back(pos);
-  }
-  // cout << "numSites: " << numSites << endl;
-  // cout << "ListSitePos: " << listSitePos.size() << " ";
-  // DumpDoubleVec(listSitePos);
-  YW_ASSERT_INFO((int)listSitePos.size() == numSites, "Wrong");
+void SplitMatrixIntoMaximalFullyCompatRegs(const BinaryMatrix &mat, vector<pair<int, int>> &listFullyCompatRegs)
+{
+	BinaryMatrix &matInst = const_cast<BinaryMatrix &>(mat);
+
+	// divide a (potentially very large) matrix into maximal fully compatible regions
+	int posLeft = 0;
+	int posCur = posLeft + 1;
+	while (posCur < mat.GetColNum())
+	{
+		// check compaibility for all previous ones
+		bool fFullyCompat = true;
+		for (int c = posLeft; c < posCur; ++c)
+		{
+			if (matInst.IsCompatible(c, posCur) == false)
+			{
+				fFullyCompat = false;
+				break;
+			}
+		}
+		if (fFullyCompat == false)
+		{
+			pair<int, int> pp(posLeft, posCur - 1);
+			listFullyCompatRegs.push_back(pp);
+			posLeft = posCur;
+		}
+		++posCur;
+	}
+	// add last segment if remain a lst one
+	pair<int, int> pp(posLeft, mat.GetColNum() - 1);
+	listFullyCompatRegs.push_back(pp);
+}
+
+void ReadSitePosFromFirstRowInFile(const char *filename, int numSites, vector<double> &listSitePos)
+{
+	//
+	ifstream inFile(filename);
+	if (!inFile)
+	{
+		YW_ASSERT_INFO(false, "Fatal error: cannot open the file");
+	}
+	string whitespace = " ";
+	int MAX_NUM_SITES = 102400;
+	const int BUF_SZ = MAX_NUM_SITES * sizeof(int);
+	char buf[BUF_SZ];
+	inFile.getline(buf, BUF_SZ);
+	string strbuf(buf);
+	size_t strEnd = strbuf.find_last_not_of(whitespace);
+	strbuf = strbuf.substr(0, strEnd);
+	std::istringstream is(strbuf);
+	listSitePos.clear();
+	while (is.eof() == false)
+	{
+		double pos;
+		is >> pos;
+		listSitePos.push_back(pos);
+	}
+	//cout << "numSites: " << numSites << endl;
+	//cout << "ListSitePos: " << listSitePos.size() << " ";
+	//DumpDoubleVec(listSitePos);
+	YW_ASSERT_INFO((int)listSitePos.size() == numSites, "Wrong");
 }
diff --git a/trisicell/external/scistree/BinaryMatrix.h b/trisicell/external/scistree/BinaryMatrix.h
index a81bc78..69b7079 100644
--- a/trisicell/external/scistree/BinaryMatrix.h
+++ b/trisicell/external/scistree/BinaryMatrix.h
@@ -1,141 +1,127 @@
 #ifndef BINARY_MATRIX_H
 #define BINARY_MATRIX_H
 
-#include <fstream>
-#include <iostream>
 #include <list>
-#include <map>
+#include <vector>
 #include <set>
 #include <string>
-#include <vector>
+#include <iostream>
+#include <fstream>
+#include <map>
 using namespace std;
 
 #include "BioSequenceMatrix.h"
-#include "UnWeightedGraph.h"
 #include "Utils3.h"
+#include "UnWeightedGraph.h"
 
-typedef vector<set<int> > COLUMN_EQUIV_CLASS;
+typedef vector<set<int>> COLUMN_EQUIV_CLASS;
 
 // ***************************************************************************
 // Define a reusable binary matrix class
 // ***************************************************************************
-class BinaryMatrix : public BioSequenceMatrix {
+class BinaryMatrix : public BioSequenceMatrix
+{
 public:
-  BinaryMatrix();
-  ~BinaryMatrix();
-  BinaryMatrix(int nr, int nc);
-
-  // Support assignment/copy constructor
-  BinaryMatrix(const BinaryMatrix &rhs);
-  BinaryMatrix &operator=(const BinaryMatrix &rhs);
-
-  // Important interface functions we need
-  virtual bool IsDataValid(int val); // check to see if this data is good for
-                                     // this class e.g. for genotype data, 0, 1,
-                                     // 2
-
-  // Matrix editing functions specific to Binary (i.e. haplotype) Matrix
-  void TrimDupSites(set<int> *pRemovedSites = NULL, bool fTrimSubsumed = false);
-  int FindDupRow();
-  void FindNonInformativeSites(set<int> &sitesNoinfo);
-  bool TrimNonInformativeSites(set<int> *pRemovedSet = NULL);
-  void TrimUniformSites(set<int> *pRemovedSet = NULL);
-  void FindUniformSites(set<int> &sitesUniform) const;
-  void TrimFullyCompatibleSites(set<int> *pRemovedSet = NULL);
-  virtual void TrimNgbrDupCompSites(set<int> *pRemovedSet = NULL);
-  void TrimSubsumedRows();
-  bool IsRowSubsumedBy(int r1, int r2);
-  bool IsColSubsumedBy(int c1, int c2);
-  void FindSubsumedSites(set<int> &ssSites);
-
-  // Matrix property checking
-  bool IsColNonInformative(int c, int *singletonState);
-  bool IsColNonInformative(int c);
-  bool IsColTrivial(int c);
-  void GetTrivialSites(vector<int> &trivSites);
-  bool IsCompatible(int c1, int c2);
-  bool IsCompatibleRooted(int c1, int c2, int rallele1, int rallele2);
-  bool IsSiteCompatibleWithRegion(int s, int rc1, int rc2);
-  bool IsRegionFullyCompatible(int rc1, int rc2);
-  void GetGamates(int c1, int c2, bool &f00, bool &f01, bool &f10, bool &f11);
-  virtual bool IsColComplement(int c1, int c2);
-  virtual bool IsColDuplicate(int c1, int c2);
-  bool IsPerfectPhylogeny();
-  bool IsZeroColumn(int c);
-  bool IsAllColumnsUnique();
-  int GetZeroColNum();
-  void GetAllIncompatiblePairs(set<pair<int, int> > &incompatibles);
-  virtual int GetMajorityState(int site);
-  int GetMinorStateNum(int site, int &minorState) const;
-  void GetMinorStateRows(int site, int &minorState,
-                         set<int> &listRowsWMinor) const;
-  void GetRowsWithAllele(int site, int alleleState, set<int> &setRows) const;
-  static int GetTheOtherAllele(int allele);
-
-  // Construct interval-speceific equivalance row classes,
-  // i.e. sets of row indexes that are same
-  void BuildColEquivClasses();
-  void GetUniqueColsInRange(int c1, int c2, set<int> &setUniques);
-  bool IsSequencesMatch(int r1, int r2, vector<int> &seqColPos);
-  void GetSequencesDiffSites(int r1, int r2, set<int> &seqColDiffs) const;
-
-  // Ohter utilities
-  void ConstructConflictGraph(UnWeightedGraph &graph);
-  void ConflictGraphComponents(vector<BinaryMatrix> &listSubMatrix);
-  void ConfigZeroMajSeq(); // make majority elem all-0 for each position
-  void ConfigZeroAncesSeq(const vector<int> &seqAnces); // make the matrix s.t.
-                                                        // the ancestral state
-                                                        // is always 0 in matrix
-  void DumpConvGenotypes();
-  void GreedyRemoveIncompatSites(
-      BinaryMatrix &matReduced); // greedily remove incompatible sites (i.e.
-                                 // first remove site that is incompatible w/
-                                 // most sites and continue)
-  void CalcSFS(vector<double> &listSFSFrac) const;
-  int GetDiffSitesForTwoRows(int r1, int r2) const;
-  double CalcAvePairRowsDiff() const;
-  double CalcAvePairRowsDiffBetween(const set<int> &rowsSet1,
-                                    const set<int> &rowsSet2,
-                                    double &valMindiffOut) const;
-  void CollectAllPairwiseDiffs(const set<int> &rowsSet1,
-                               const set<int> &rowsSet2,
-                               vector<double> &listRowPairsDiff) const;
-
-  // Missing data utilities
-  bool IsColumnBinary(int c) const;
-  bool IsRowBinary(int r) const;
-  void TrimNonBinaryRows();
-  bool IsRowRangeBinary(int r, int left, int right);
-
-  // Lower/upper recombination bound utilities
-  int ComputeHKBound();
-  int ComputeFastHapBound();
-  int ComputeFastRecombUpperBound();
-  int ComputeMinRecombWeight(int rowIndex);
+	BinaryMatrix();
+	~BinaryMatrix();
+	BinaryMatrix(int nr, int nc);
+
+	// Support assignment/copy constructor
+	BinaryMatrix(const BinaryMatrix &rhs);
+	BinaryMatrix &operator=(const BinaryMatrix &rhs);
+
+	// Important interface functions we need
+	virtual bool IsDataValid(int val); // check to see if this data is good for this class
+									   // e.g. for genotype data, 0, 1, 2
+
+	// Matrix editing functions specific to Binary (i.e. haplotype) Matrix
+	void TrimDupSites(set<int> *pRemovedSites = NULL, bool fTrimSubsumed = false);
+	int FindDupRow();
+	void FindNonInformativeSites(set<int> &sitesNoinfo);
+	bool TrimNonInformativeSites(set<int> *pRemovedSet = NULL);
+	void TrimUniformSites(set<int> *pRemovedSet = NULL);
+	void FindUniformSites(set<int> &sitesUniform) const;
+	void TrimFullyCompatibleSites(set<int> *pRemovedSet = NULL);
+	virtual void TrimNgbrDupCompSites(set<int> *pRemovedSet = NULL);
+	void TrimSubsumedRows();
+	bool IsRowSubsumedBy(int r1, int r2);
+	bool IsColSubsumedBy(int c1, int c2);
+	void FindSubsumedSites(set<int> &ssSites);
+
+	// Matrix property checking
+	bool IsColNonInformative(int c, int *singletonState);
+	bool IsColNonInformative(int c);
+	bool IsColTrivial(int c);
+	void GetTrivialSites(vector<int> &trivSites);
+	bool IsCompatible(int c1, int c2);
+	bool IsCompatibleRooted(int c1, int c2, int rallele1, int rallele2);
+	bool IsSiteCompatibleWithRegion(int s, int rc1, int rc2);
+	bool IsRegionFullyCompatible(int rc1, int rc2);
+	void GetGamates(int c1, int c2, bool &f00, bool &f01, bool &f10, bool &f11);
+	virtual bool IsColComplement(int c1, int c2);
+	virtual bool IsColDuplicate(int c1, int c2);
+	bool IsPerfectPhylogeny();
+	bool IsZeroColumn(int c);
+	bool IsAllColumnsUnique();
+	int GetZeroColNum();
+	void GetAllIncompatiblePairs(set<pair<int, int>> &incompatibles);
+	virtual int GetMajorityState(int site);
+	int GetMinorStateNum(int site, int &minorState) const;
+	void GetMinorStateRows(int site, int &minorState, set<int> &listRowsWMinor) const;
+	void GetRowsWithAllele(int site, int alleleState, set<int> &setRows) const;
+	static int GetTheOtherAllele(int allele);
+
+	// Construct interval-speceific equivalance row classes,
+	// i.e. sets of row indexes that are same
+	void BuildColEquivClasses();
+	void GetUniqueColsInRange(int c1, int c2, set<int> &setUniques);
+	bool IsSequencesMatch(int r1, int r2, vector<int> &seqColPos);
+	void GetSequencesDiffSites(int r1, int r2, set<int> &seqColDiffs) const;
+
+	// Ohter utilities
+	void ConstructConflictGraph(UnWeightedGraph &graph);
+	void ConflictGraphComponents(vector<BinaryMatrix> &listSubMatrix);
+	void ConfigZeroMajSeq();							  // make majority elem all-0 for each position
+	void ConfigZeroAncesSeq(const vector<int> &seqAnces); // make the matrix s.t. the ancestral state is always 0 in matrix
+	void DumpConvGenotypes();
+	void GreedyRemoveIncompatSites(BinaryMatrix &matReduced); // greedily remove incompatible sites (i.e. first remove site that is incompatible w/ most sites and continue)
+	void CalcSFS(vector<double> &listSFSFrac) const;
+	int GetDiffSitesForTwoRows(int r1, int r2) const;
+	double CalcAvePairRowsDiff() const;
+	double CalcAvePairRowsDiffBetween(const set<int> &rowsSet1, const set<int> &rowsSet2, double &valMindiffOut) const;
+	void CollectAllPairwiseDiffs(const set<int> &rowsSet1, const set<int> &rowsSet2, vector<double> &listRowPairsDiff) const;
+
+	// Missing data utilities
+	bool IsColumnBinary(int c) const;
+	bool IsRowBinary(int r) const;
+	void TrimNonBinaryRows();
+	bool IsRowRangeBinary(int r, int left, int right);
+
+	// Lower/upper recombination bound utilities
+	int ComputeHKBound();
+	int ComputeFastHapBound();
+	int ComputeFastRecombUpperBound();
+	int ComputeMinRecombWeight(int rowIndex);
 
 private:
-  // Interval-based equivlance classes
-  COLUMN_EQUIV_CLASS setColEquiv;
+	// Interval-based equivlance classes
+	COLUMN_EQUIV_CLASS setColEquiv;
 };
 
 // some other useful functions
-// this structure defines what rows to keep and what not to, and for each
-// removed row, which row it comes from (i.e. duplicate) NOTE: we are dealing
-// with the current rows only. THat is, the removal may be in stages in each
-// stage, we only consider what we have so far
-typedef struct {
-  set<int> rowsRemoved;
-  vector<pair<int, int> > pairsRmKeepRows;
+// this structure defines what rows to keep and what not to, and for each removed row, which row it comes from (i.e. duplicate)
+// NOTE: we are dealing with the current rows only. THat is, the removal may be in stages
+// in each stage, we only consider what we have so far
+typedef struct
+{
+	set<int> rowsRemoved;
+	vector<pair<int, int>> pairsRmKeepRows;
 } REMOVED_ROWS_INFO;
 
-void GetNoninformativeRowsInMat(const BinaryMatrix &mat, set<int> &trimedRows,
-                                vector<REMOVED_ROWS_INFO> &trimedRowInfo,
-                                set<int> &trimedCols, BinaryMatrix &matUpdated,
-                                bool fRmDup = false);
-void SplitMatrixIntoMaximalFullyCompatRegs(
-    const BinaryMatrix &mat, vector<pair<int, int> > &listFullyCompatRegs);
+void GetNoninformativeRowsInMat(const BinaryMatrix &mat, set<int> &trimedRows, vector<REMOVED_ROWS_INFO> &trimedRowInfo, set<int> &trimedCols,
+								BinaryMatrix &matUpdated, bool fRmDup = false);
+void SplitMatrixIntoMaximalFullyCompatRegs(const BinaryMatrix &mat, vector<pair<int, int>> &listFullyCompatRegs);
 
-void ReadSitePosFromFirstRowInFile(const char *filename, int numSites,
-                                   vector<double> &listSitePos);
+void ReadSitePosFromFirstRowInFile(const char *filename, int numSites, vector<double> &listSitePos);
 
-#endif // BINARY_MATRIX_H
+#endif //BINARY_MATRIX_H
diff --git a/trisicell/external/scistree/BioSequenceMatrix.cpp b/trisicell/external/scistree/BioSequenceMatrix.cpp
index 7468adb..906c517 100644
--- a/trisicell/external/scistree/BioSequenceMatrix.cpp
+++ b/trisicell/external/scistree/BioSequenceMatrix.cpp
@@ -1,10 +1,10 @@
 #include "BioSequenceMatrix.h"
-#include "Utils2.h"
 #include <cmath>
-#include <cstdio>
 #include <cstdlib>
+#include <cstdio>
 #include <cstring>
 #include <fstream>
+#include "Utils2.h"
 
 // ***************************************************************************
 // Define a reusable binary matrix class
@@ -16,446 +16,516 @@ BioSequenceMatrix :: BioSequenceMatrix()
 }
 #endif
 
-BioSequenceMatrix ::~BioSequenceMatrix() { Clear(); }
+BioSequenceMatrix ::~BioSequenceMatrix()
+{
+	Clear();
+}
 
-void BioSequenceMatrix ::AppendRow(const vector<int> &row) {
-  // Check to see if this is the first row, if so OK
-  if (nCols == 0 && GetRowNum() == 0) {
-    nCols = row.size();
-  }
+void BioSequenceMatrix ::AppendRow(const vector<int> &row)
+{
+	// Check to see if this is the first row, if so OK
+	if (nCols == 0 && GetRowNum() == 0)
+	{
+		nCols = row.size();
+	}
 
-  if (row.size() != (unsigned int)nCols) {
-    DEBUG("WRONG row width in AddRow");
-    return;
-  }
+	if (row.size() != (unsigned int)nCols)
+	{
+		DEBUG("WRONG row width in AddRow");
+		return;
+	}
 
-  int *buf = new int[nCols];
-  for (int i = 0; i < nCols; ++i) {
-    buf[i] = row[i];
-  }
-  rowsArray.push_back(buf);
+	int *buf = new int[nCols];
+	for (int i = 0; i < nCols; ++i)
+	{
+		buf[i] = row[i];
+	}
+	rowsArray.push_back(buf);
 }
 
-void BioSequenceMatrix ::AppendSetOfRows(const set<SEQUENCE> &rows) {
-  for (set<SEQUENCE>::iterator it = rows.begin(); it != rows.end(); ++it) {
-    AppendRow(*it);
-  }
+void BioSequenceMatrix ::AppendSetOfRows(const set<SEQUENCE> &rows)
+{
+	for (set<SEQUENCE>::iterator it = rows.begin(); it != rows.end(); ++it)
+	{
+		AppendRow(*it);
+	}
 }
 
-void BioSequenceMatrix ::AppendRows(const vector<SEQUENCE> &rows) {
-  for (unsigned int i = 0; i < rows.size(); ++i) {
-    AppendRow(rows[i]);
-  }
+void BioSequenceMatrix ::AppendRows(const vector<SEQUENCE> &rows)
+{
+	for (unsigned int i = 0; i < rows.size(); ++i)
+	{
+		AppendRow(rows[i]);
+	}
 }
 
-// This function removes a set of columns that are specified in the set as
-// duplicateSites
-void BioSequenceMatrix ::InsertColumns(const vector<SEQUENCE> &sitesValue,
-                                       const vector<int> &sitesPos) {
-  // we require the site contains the same number of values as rows
-  YW_ASSERT_INFO(sitesPos.size() == (unsigned int)sitesValue.size(),
-                 "Wrong vector size.");
-  YW_ASSERT_INFO(sitesValue.size() > 0, "Can not be empty.");
-  YW_ASSERT_INFO(sitesValue[0].size() == (unsigned int)GetRowNum(),
-                 "Size mismatch.");
-
-  int totalLen = GetColNum() + sitesPos.size();
-
-  // First we need to calculate where to put these new sites
-  // remember the passed-in values are BEFORE insertion. For example, when we
-  // say we want to insert sites a,b at location 0, 2, when mean we want to put
-  // a at 0 (this pushes the value forward
-  vector<int> poses;
-  int offset = 0;
-  for (unsigned int i = 0; i < sitesPos.size(); ++i) {
-    // Treat out of ranges as close to real
-    int realPos = sitesPos[i];
-    if (realPos < 0) {
-      realPos = 0;
-    } else if (realPos > GetColNum()) {
-      realPos = GetColNum();
-    }
-    poses.push_back(realPos + offset);
-    offset++;
-  }
-  // cout << "poses = ";
-  // DumpIntVec( poses );
-  // now we create  a new matrix with different size
-  for (unsigned int i = 0; i < rowsArray.size(); ++i) {
-    int *buf = new int[totalLen];
-    int origPos = 0;
-    int pos = 0;
-    for (unsigned int j = 0; j < poses.size(); ++j) {
-      // cout << "1. origPos = " << origPos << ", pos = " << pos << endl;
-      for (; pos < poses[j]; ++pos) {
-        buf[pos] = rowsArray[i][origPos++];
-      }
-      // Now add poses[j]
-      // cout << "Before assign site,  origPos = " << origPos << ", pos = " <<
-      // pos << endl;
-      buf[pos++] = sitesValue[j][i];
-    }
-    // We finish any leftover
-    for (; pos < totalLen; ++pos, ++origPos) {
-      // cout << "2. origPos = " << origPos << ", pos = " << pos << endl;
-      buf[pos] = rowsArray[i][origPos];
-    }
+// This function removes a set of columns that are specified in the set as duplicateSites
+void BioSequenceMatrix ::InsertColumns(const vector<SEQUENCE> &sitesValue, const vector<int> &sitesPos)
+{
+	// we require the site contains the same number of values as rows
+	YW_ASSERT_INFO(sitesPos.size() == (unsigned int)sitesValue.size(), "Wrong vector size.");
+	YW_ASSERT_INFO(sitesValue.size() > 0, "Can not be empty.");
+	YW_ASSERT_INFO(sitesValue[0].size() == (unsigned int)GetRowNum(), "Size mismatch.");
+
+	int totalLen = GetColNum() + sitesPos.size();
+
+	// First we need to calculate where to put these new sites
+	// remember the passed-in values are BEFORE insertion. For example, when we say we want to insert sites a,b
+	// at location 0, 2, when mean we want to put a at 0 (this pushes the value forward
+	vector<int> poses;
+	int offset = 0;
+	for (unsigned int i = 0; i < sitesPos.size(); ++i)
+	{
+		// Treat out of ranges as close to real
+		int realPos = sitesPos[i];
+		if (realPos < 0)
+		{
+			realPos = 0;
+		}
+		else if (realPos > GetColNum())
+		{
+			realPos = GetColNum();
+		}
+		poses.push_back(realPos + offset);
+		offset++;
+	}
+	//cout << "poses = ";
+	//DumpIntVec( poses );
+	// now we create  a new matrix with different size
+	for (unsigned int i = 0; i < rowsArray.size(); ++i)
+	{
+		int *buf = new int[totalLen];
+		int origPos = 0;
+		int pos = 0;
+		for (unsigned int j = 0; j < poses.size(); ++j)
+		{
+			//cout << "1. origPos = " << origPos << ", pos = " << pos << endl;
+			for (; pos < poses[j]; ++pos)
+			{
+				buf[pos] = rowsArray[i][origPos++];
+			}
+			// Now add poses[j]
+			//cout << "Before assign site,  origPos = " << origPos << ", pos = " << pos << endl;
+			buf[pos++] = sitesValue[j][i];
+		}
+		// We finish any leftover
+		for (; pos < totalLen; ++pos, ++origPos)
+		{
+			//cout << "2. origPos = " << origPos << ", pos = " << pos << endl;
+			buf[pos] = rowsArray[i][origPos];
+		}
 
-    // now we free the memory of old buffer
-    delete[] rowsArray[i];
-    rowsArray[i] = buf;
-  }
+		// now we free the memory of old buffer
+		delete[] rowsArray[i];
+		rowsArray[i] = buf;
+	}
 
-  nCols = totalLen;
+	nCols = totalLen;
 }
 
-void BioSequenceMatrix ::AppendMatrixByCol(
-    const BioSequenceMatrix &appendedMat) {
-  // Append the matrix by putting the matrix to the right
-  // Make sure the row matches
-  YW_ASSERT_INFO(appendedMat.IsEmpty() == false,
-                 "For now, do not allow appending empty matrix.");
-  YW_ASSERT_INFO(IsEmpty() || GetRowNum() == appendedMat.GetRowNum(),
-                 "Can not append such matrix");
-
-  // Figure out the size
-  vector<int *> rowsArrayNew; // array of rows
-  int rowNum, colNum;
-  if (IsEmpty() == false) {
-    rowNum = GetRowNum();
-    colNum = GetColNum();
-  } else {
-    // Use the new matrix's value
-    rowNum = appendedMat.GetRowNum();
-    colNum = 0;
-  }
-  int numSitesNew = colNum + appendedMat.GetColNum();
-  // Allocate space
-  for (int r = 0; r < rowNum; ++r) {
-    int *buf = new int[numSitesNew];
-    rowsArrayNew.push_back(buf);
-  }
-  // Now copy the stuff in
-  for (int r = 0; r < rowNum; ++r) {
-    for (int c = 0; c < colNum; ++c) {
-      rowsArrayNew[r][c] = rowsArray[r][c];
-    }
-    for (int c = 0; c < appendedMat.GetColNum(); ++c) {
-      rowsArrayNew[r][c + colNum] = appendedMat(r, c);
-    }
-  }
+void BioSequenceMatrix ::AppendMatrixByCol(const BioSequenceMatrix &appendedMat)
+{
+	// Append the matrix by putting the matrix to the right
+	// Make sure the row matches
+	YW_ASSERT_INFO(appendedMat.IsEmpty() == false, "For now, do not allow appending empty matrix.");
+	YW_ASSERT_INFO(IsEmpty() || GetRowNum() == appendedMat.GetRowNum(), "Can not append such matrix");
+
+	// Figure out the size
+	vector<int *> rowsArrayNew; // array of rows
+	int rowNum, colNum;
+	if (IsEmpty() == false)
+	{
+		rowNum = GetRowNum();
+		colNum = GetColNum();
+	}
+	else
+	{
+		// Use the new matrix's value
+		rowNum = appendedMat.GetRowNum();
+		colNum = 0;
+	}
+	int numSitesNew = colNum + appendedMat.GetColNum();
+	// Allocate space
+	for (int r = 0; r < rowNum; ++r)
+	{
+		int *buf = new int[numSitesNew];
+		rowsArrayNew.push_back(buf);
+	}
+	// Now copy the stuff in
+	for (int r = 0; r < rowNum; ++r)
+	{
+		for (int c = 0; c < colNum; ++c)
+		{
+			rowsArrayNew[r][c] = rowsArray[r][c];
+		}
+		for (int c = 0; c < appendedMat.GetColNum(); ++c)
+		{
+			rowsArrayNew[r][c + colNum] = appendedMat(r, c);
+		}
+	}
 
-  // Remove the old ones
-  Clear();
-  // Set to the new one
-  nCols = numSitesNew;
-  rowsArray = rowsArrayNew;
+	// Remove the old ones
+	Clear();
+	// Set to the new one
+	nCols = numSitesNew;
+	rowsArray = rowsArrayNew;
 }
 
-void BioSequenceMatrix ::AppendMatrixByRow(
-    const BioSequenceMatrix &appendedMat) {
-  // Append the matrix by putting the matrix to the right
-  // Make sure the row matches
-  YW_ASSERT_INFO(appendedMat.IsEmpty() == false,
-                 "For now, do not allow appending empty matrix.");
-  YW_ASSERT_INFO(IsEmpty() || GetColNum() == appendedMat.GetColNum(),
-                 "Can not append such matrix");
-
-  // Now copy the stuff in
-  for (int r = 0; r < appendedMat.GetRowNum(); ++r) {
-    SEQUENCE seq;
-    appendedMat.GetRow(r, seq);
-    this->AppendRow(seq);
-  }
+void BioSequenceMatrix ::AppendMatrixByRow(const BioSequenceMatrix &appendedMat)
+{
+	// Append the matrix by putting the matrix to the right
+	// Make sure the row matches
+	YW_ASSERT_INFO(appendedMat.IsEmpty() == false, "For now, do not allow appending empty matrix.");
+	YW_ASSERT_INFO(IsEmpty() || GetColNum() == appendedMat.GetColNum(), "Can not append such matrix");
+
+	// Now copy the stuff in
+	for (int r = 0; r < appendedMat.GetRowNum(); ++r)
+	{
+		SEQUENCE seq;
+		appendedMat.GetRow(r, seq);
+		this->AppendRow(seq);
+	}
 }
 
-void BioSequenceMatrix ::SetRow(int r, const vector<int> &valNew) {
-  if (valNew.size() != (unsigned int)nCols) {
-    DEBUG("WRONG row width in SetRow");
-    return;
-  }
-  for (int i = 0; i < nCols; ++i) {
-    rowsArray[r][i] = valNew[i];
-  }
+void BioSequenceMatrix ::SetRow(int r, const vector<int> &valNew)
+{
+	if (valNew.size() != (unsigned int)nCols)
+	{
+		DEBUG("WRONG row width in SetRow");
+		return;
+	}
+	for (int i = 0; i < nCols; ++i)
+	{
+		rowsArray[r][i] = valNew[i];
+	}
 }
 
-void BioSequenceMatrix ::SetCol(int c, const vector<int> &valNew) {
-  if (valNew.size() != (unsigned int)GetRowNum()) {
-    DEBUG("WRONG row width in SetRow");
-    return;
-  }
-  for (int i = 0; i < GetRowNum(); ++i) {
-    rowsArray[i][c] = valNew[i];
-  }
+void BioSequenceMatrix ::SetCol(int c, const vector<int> &valNew)
+{
+	if (valNew.size() != (unsigned int)GetRowNum())
+	{
+		DEBUG("WRONG row width in SetRow");
+		return;
+	}
+	for (int i = 0; i < GetRowNum(); ++i)
+	{
+		rowsArray[i][c] = valNew[i];
+	}
 }
 
-void BioSequenceMatrix ::Clear() {
-  // Need to free up data if needed
-  for (unsigned int i = 0; i < rowsArray.size(); ++i) {
-    delete[] rowsArray[i];
-  }
-  rowsArray.clear();
-  nCols = 0;
+void BioSequenceMatrix ::Clear()
+{
+	// Need to free up data if needed
+	for (unsigned int i = 0; i < rowsArray.size(); ++i)
+	{
+		delete[] rowsArray[i];
+	}
+	rowsArray.clear();
+	nCols = 0;
 }
 
-void BioSequenceMatrix ::Copy(const BioSequenceMatrix &rhs) {
-  Clear(); // 012713: it seems we should clear this one first
-  for (unsigned int i = 0; i < rhs.rowsArray.size(); ++i) {
-    int *buf = new int[rhs.nCols];
-    for (int j = 0; j < rhs.nCols; ++j) {
-      buf[j] = rhs.rowsArray[i][j];
-    }
-    rowsArray.push_back(buf);
-  }
-  nCols = rhs.nCols;
+void BioSequenceMatrix ::Copy(const BioSequenceMatrix &rhs)
+{
+	Clear(); // 012713: it seems we should clear this one first
+	for (unsigned int i = 0; i < rhs.rowsArray.size(); ++i)
+	{
+		int *buf = new int[rhs.nCols];
+		for (int j = 0; j < rhs.nCols; ++j)
+		{
+			buf[j] = rhs.rowsArray[i][j];
+		}
+		rowsArray.push_back(buf);
+	}
+	nCols = rhs.nCols;
 }
 
-void BioSequenceMatrix ::RemoveRow(int rowIndex) {
-  if ((unsigned int)rowIndex >= rowsArray.size()) {
-    return;
-  }
-
-  int nPos = -1;
-  for (vector<int *>::iterator it = rowsArray.begin(); it != rowsArray.end();
-       ++it) {
-    nPos++;
-    if (nPos == rowIndex) {
-      delete[] * it;
-      rowsArray.erase(it);
-      return;
-    }
-  }
-  DEBUG("Something very wrong inside BioSequenceMatrix :: RemoveRow");
+void BioSequenceMatrix ::RemoveRow(int rowIndex)
+{
+	if ((unsigned int)rowIndex >= rowsArray.size())
+	{
+		return;
+	}
+
+	int nPos = -1;
+	for (vector<int *>::iterator it = rowsArray.begin(); it != rowsArray.end(); ++it)
+	{
+		nPos++;
+		if (nPos == rowIndex)
+		{
+			delete[] * it;
+			rowsArray.erase(it);
+			return;
+		}
+	}
+	DEBUG("Something very wrong inside BioSequenceMatrix :: RemoveRow");
 }
 
 // Consolidate rows in matrix
-void BioSequenceMatrix::TrimDupRows(set<int> *pTrimedRows,
-                                    vector<pair<int, int> > *pTrimRowInfo) {
-
-  set<int> setOfDuplicates;
-  vector<pair<int, int> > listRowsDeletedWithExistingPairs;
-  setOfDuplicates.clear();
-  unsigned int r1, r2;
-  int c;
-
-  bool res = false; // we stop unless we find some duplicate rows and/or
-                    // non-informat site
-
-  for (r1 = 0; r1 < rowsArray.size(); ++r1) {
-    for (r2 = r1 + 1; r2 < rowsArray.size(); ++r2) {
-      /*
-              Now test whether row 1 and row 2 are the same
-      */
-      bool fSame = true;
-      for (c = 0; c < nCols; ++c) {
-        if (rowsArray[r1][c] != rowsArray[r2][c]) {
-          fSame = false;
-          break;
-        }
-      }
-      if (fSame) {
-        if (setOfDuplicates.find(r2) == setOfDuplicates.end()) {
-          pair<int, int> pp;
-          pp.first = r2; // first item is which row is removed
-          pp.second =
-              r1; // second item is which row is the source (to be kepted)
-          listRowsDeletedWithExistingPairs.push_back(pp);
-        }
-
-        // cout << "row " << r2 << " is duplicate." << endl;
-        setOfDuplicates.insert(r2);
-      }
-    }
-  }
-  /*
-          Now we remove all duplicate rows
-  */
-  if (setOfDuplicates.size() > 0) {
-    res = true;
-    RemoveRows(setOfDuplicates);
-  }
-  if (pTrimedRows != NULL) {
-    *pTrimedRows = setOfDuplicates;
-  }
-  if (pTrimRowInfo != NULL) {
-    *pTrimRowInfo = listRowsDeletedWithExistingPairs;
-  }
-
-  return;
+void BioSequenceMatrix::TrimDupRows(set<int> *pTrimedRows, vector<pair<int, int>> *pTrimRowInfo)
+{
+
+	set<int> setOfDuplicates;
+	vector<pair<int, int>> listRowsDeletedWithExistingPairs;
+	setOfDuplicates.clear();
+	unsigned int r1, r2;
+	int c;
+
+	bool res = false; // we stop unless we find some duplicate rows and/or non-informat site
+
+	for (r1 = 0; r1 < rowsArray.size(); ++r1)
+	{
+		for (r2 = r1 + 1; r2 < rowsArray.size(); ++r2)
+		{
+			/*
+				Now test whether row 1 and row 2 are the same
+			*/
+			bool fSame = true;
+			for (c = 0; c < nCols; ++c)
+			{
+				if (rowsArray[r1][c] != rowsArray[r2][c])
+				{
+					fSame = false;
+					break;
+				}
+			}
+			if (fSame)
+			{
+				if (setOfDuplicates.find(r2) == setOfDuplicates.end())
+				{
+					pair<int, int> pp;
+					pp.first = r2;	// first item is which row is removed
+					pp.second = r1; // second item is which row is the source (to be kepted)
+					listRowsDeletedWithExistingPairs.push_back(pp);
+				}
+
+				//cout << "row " << r2 << " is duplicate." << endl;
+				setOfDuplicates.insert(r2);
+			}
+		}
+	}
+	/*
+		Now we remove all duplicate rows
+	*/
+	if (setOfDuplicates.size() > 0)
+	{
+		res = true;
+		RemoveRows(setOfDuplicates);
+	}
+	if (pTrimedRows != NULL)
+	{
+		*pTrimedRows = setOfDuplicates;
+	}
+	if (pTrimRowInfo != NULL)
+	{
+		*pTrimRowInfo = listRowsDeletedWithExistingPairs;
+	}
+
+	return;
 }
 
-void BioSequenceMatrix ::DumpRowMultiplicity() const {
-  // This function dump out duplicate row information
-  map<SEQUENCE, int> mapRowMultiplicity;
-  for (int r = 0; r < GetRowNum(); ++r) {
-    SEQUENCE row;
-    GetRow(r, row);
-    if (mapRowMultiplicity.find(row) == mapRowMultiplicity.end()) {
-      mapRowMultiplicity.insert(map<SEQUENCE, int>::value_type(row, 1));
-    } else {
-      mapRowMultiplicity[row]++;
-    }
-  }
-  // Now dump out info
-  cout << "In this matrix, the multiplicity of rows is: \n";
-  for (map<SEQUENCE, int>::iterator it = mapRowMultiplicity.begin();
-       it != mapRowMultiplicity.end(); ++it) {
-    cout << "seq = ";
-    DumpSequence(it->first);
-    cout << ", multiplicty = ";
-    cout << it->second << endl;
-  }
+void BioSequenceMatrix ::DumpRowMultiplicity() const
+{
+	// This function dump out duplicate row information
+	map<SEQUENCE, int> mapRowMultiplicity;
+	for (int r = 0; r < GetRowNum(); ++r)
+	{
+		SEQUENCE row;
+		GetRow(r, row);
+		if (mapRowMultiplicity.find(row) == mapRowMultiplicity.end())
+		{
+			mapRowMultiplicity.insert(map<SEQUENCE, int>::value_type(row, 1));
+		}
+		else
+		{
+			mapRowMultiplicity[row]++;
+		}
+	}
+	// Now dump out info
+	cout << "In this matrix, the multiplicity of rows is: \n";
+	for (map<SEQUENCE, int>::iterator it = mapRowMultiplicity.begin(); it != mapRowMultiplicity.end(); ++it)
+	{
+		cout << "seq = ";
+		DumpSequence(it->first);
+		cout << ", multiplicty = ";
+		cout << it->second << endl;
+	}
 }
 
-void BioSequenceMatrix ::GetColMultiplicityMap(
-    vector<int> &listColMulti) const {
-  // for each col (site), find out the number of duplicate each site has (that
-  // is, listMulti[i] = # of sites with the same column)
-  listColMulti.clear();
-  listColMulti.resize(GetColNum());
-  map<SEQUENCE, set<int> > mapColMulti;
-  for (int c = 0; c < GetColNum(); ++c) {
-    SEQUENCE col;
-    GetCol(c, col);
-    mapColMulti[col].insert(c);
-  }
-  for (map<SEQUENCE, set<int> >::iterator it = mapColMulti.begin();
-       it != mapColMulti.end(); ++it) {
-    for (set<int>::iterator it2 = it->second.begin(); it2 != it->second.end();
-         ++it2) {
-      listColMulti[*it2] = it->second.size();
-    }
-  }
+void BioSequenceMatrix ::GetColMultiplicityMap(vector<int> &listColMulti) const
+{
+	// for each col (site), find out the number of duplicate each site has (that is, listMulti[i] = # of sites with the same column)
+	listColMulti.clear();
+	listColMulti.resize(GetColNum());
+	map<SEQUENCE, set<int>> mapColMulti;
+	for (int c = 0; c < GetColNum(); ++c)
+	{
+		SEQUENCE col;
+		GetCol(c, col);
+		mapColMulti[col].insert(c);
+	}
+	for (map<SEQUENCE, set<int>>::iterator it = mapColMulti.begin(); it != mapColMulti.end(); ++it)
+	{
+		for (set<int>::iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
+		{
+			listColMulti[*it2] = it->second.size();
+		}
+	}
 }
 
-int BioSequenceMatrix ::GetMultiplictyForRow(int r) const {
-  SEQUENCE seqRow;
-  GetRow(r, seqRow);
-  return GetMultiplictyForRow(seqRow);
+int BioSequenceMatrix ::GetMultiplictyForRow(int r) const
+{
+	SEQUENCE seqRow;
+	GetRow(r, seqRow);
+	return GetMultiplictyForRow(seqRow);
 }
 
-int BioSequenceMatrix ::GetMultiplictyForRow(const SEQUENCE &seqRow) const {
-  int res = 0;
-  for (int i = 0; i < GetRowNum(); ++i) {
-    SEQUENCE curRow;
-    GetRow(i, curRow);
-    if (curRow == seqRow) {
-      ++res;
-    }
-  }
-  return res;
+int BioSequenceMatrix ::GetMultiplictyForRow(const SEQUENCE &seqRow) const
+{
+	int res = 0;
+	for (int i = 0; i < GetRowNum(); ++i)
+	{
+		SEQUENCE curRow;
+		GetRow(i, curRow);
+		if (curRow == seqRow)
+		{
+			++res;
+		}
+	}
+	return res;
 }
 
-int BioSequenceMatrix ::GetMultiplictyForRow(const SEQUENCE &seqRow,
-                                             set<int> &identRows) const {
-  identRows.clear();
-  int res = 0;
-  for (int i = 0; i < GetRowNum(); ++i) {
-    SEQUENCE curRow;
-    GetRow(i, curRow);
-    if (curRow == seqRow) {
-      identRows.insert(i);
-      ++res;
-    }
-  }
-  // YW_ASSERT_INFO( res > 0, "Must appear at least once." );
-  return res;
+int BioSequenceMatrix ::GetMultiplictyForRow(const SEQUENCE &seqRow, set<int> &identRows) const
+{
+	identRows.clear();
+	int res = 0;
+	for (int i = 0; i < GetRowNum(); ++i)
+	{
+		SEQUENCE curRow;
+		GetRow(i, curRow);
+		if (curRow == seqRow)
+		{
+			identRows.insert(i);
+			++res;
+		}
+	}
+	//YW_ASSERT_INFO( res > 0, "Must appear at least once." );
+	return res;
 }
 
-int BioSequenceMatrix ::GetMultiplictyForRowIV(int r, int left,
-                                               int right) const {
-  SEQUENCE row;
-  GetRow(r, row);
-  SEQUENCE rowIV;
-  GetSeqInterval(row, rowIV, left, right);
-  int res = 0;
-  for (int i = 0; i < GetRowNum(); ++i) {
-    SEQUENCE curRow;
-    GetRow(i, curRow);
-    SEQUENCE rowIV1;
-    GetSeqInterval(curRow, rowIV1, left, right);
-
-    if (rowIV1 == rowIV) {
-      ++res;
-    }
-  }
-  return res;
+int BioSequenceMatrix ::GetMultiplictyForRowIV(int r, int left, int right) const
+{
+	SEQUENCE row;
+	GetRow(r, row);
+	SEQUENCE rowIV;
+	GetSeqInterval(row, rowIV, left, right);
+	int res = 0;
+	for (int i = 0; i < GetRowNum(); ++i)
+	{
+		SEQUENCE curRow;
+		GetRow(i, curRow);
+		SEQUENCE rowIV1;
+		GetSeqInterval(curRow, rowIV1, left, right);
+
+		if (rowIV1 == rowIV)
+		{
+			++res;
+		}
+	}
+	return res;
 }
 
-bool BioSequenceMatrix ::ReadFromFile(ifstream &inFile, bool fSkipFirstLine) {
-  bool res = true;
-
-  // Now, we first check one row to find out how many sites
-  // first read in the matrix name first
-  const int BUF_SZ = MAX_SITE_NUM * sizeof(int);
-  char buf[BUF_SZ]; // assume maximum sites allowed are 4096
-  if (fSkipFirstLine == true) {
-    inFile.getline(buf, BUF_SZ);
-    //	cout << "Matrix name is " << buf << endl;
-  }
-
-  int rowLength = 0;
-  while (!inFile.eof()) {
-    inFile.getline(buf, BUF_SZ);
-    DEBUG("strlen of buf ");
-    DEBUG(strlen(buf));
-    DEBUG("\n");
-    DEBUG("buffer is:");
-    DEBUG(buf);
-    DEBUG("\n");
-
-    // ignore any ine starting with #
-    if (buf[0] == '#') {
-      continue;
-    }
+bool BioSequenceMatrix ::ReadFromFile(ifstream &inFile, bool fSkipFirstLine)
+{
+	bool res = true;
 
-    int curRowLen = 0;
-    curRowLen = strlen(buf);
-    // but we need to check to make sure there is no garbage character at the
-    // end
-    for (int i = curRowLen - 1;;) {
-      if (i > 0 && buf[i] != '0' && buf[i] != '1' && buf[i] != '2' &&
-          buf[i] != '*' && buf[i] != '?') {
-        i--;
-        curRowLen--;
-      } else {
-        break;
-      }
-    }
+	// Now, we first check one row to find out how many sites
+	// first read in the matrix name first
+	const int BUF_SZ = MAX_SITE_NUM * sizeof(int);
+	char buf[BUF_SZ]; // assume maximum sites allowed are 4096
+	if (fSkipFirstLine == true)
+	{
+		inFile.getline(buf, BUF_SZ);
+		//	cout << "Matrix name is " << buf << endl;
+	}
 
-    if (rowLength == 0) {
-      rowLength = curRowLen;
-    }
-    if (rowLength != curRowLen) {
-      // for some reason, we are getting a smaller size
-      // simplely terminate here
-      // DEBUG("Warning: one row of fle seems to have fewer data bits.\n");
-      // res = false;
-      break;
-    }
-    int *pRow = new int[rowLength];
-    for (int i = 0; i < rowLength; ++i) {
-      if (buf[i] == '1') {
-        pRow[i] = 1;
-      } else if (buf[i] == '0') {
-        pRow[i] = 0;
-      } else if (buf[i] == '2') {
-        pRow[i] = 2;
-      } else if (buf[i] == '*' || buf[i] == '?') {
-        pRow[i] = MISSING_VALUE_BIT;
-      } else {
-        YW_ASSERT_INFO(false, "Un-recognized characters in input.");
-        exit(1);
-      }
-    }
-    // Now put it into a list
-    rowsArray.push_back(pRow);
-  }
+	int rowLength = 0;
+	while (!inFile.eof())
+	{
+		inFile.getline(buf, BUF_SZ);
+		DEBUG("strlen of buf ");
+		DEBUG(strlen(buf));
+		DEBUG("\n");
+		DEBUG("buffer is:");
+		DEBUG(buf);
+		DEBUG("\n");
+
+		// ignore any ine starting with #
+		if (buf[0] == '#')
+		{
+			continue;
+		}
 
-  // Now set return value
-  nCols = rowLength;
+		int curRowLen = 0;
+		curRowLen = strlen(buf);
+		// but we need to check to make sure there is no garbage character at the end
+		for (int i = curRowLen - 1;;)
+		{
+			if (i > 0 && buf[i] != '0' && buf[i] != '1' && buf[i] != '2' && buf[i] != '*' && buf[i] != '?')
+			{
+				i--;
+				curRowLen--;
+			}
+			else
+			{
+				break;
+			}
+		}
 
-  return res;
+		if (rowLength == 0)
+		{
+			rowLength = curRowLen;
+		}
+		if (rowLength != curRowLen)
+		{
+			// for some reason, we are getting a smaller size
+			// simplely terminate here
+			//DEBUG("Warning: one row of fle seems to have fewer data bits.\n");
+			//res = false;
+			break;
+		}
+		int *pRow = new int[rowLength];
+		for (int i = 0; i < rowLength; ++i)
+		{
+			if (buf[i] == '1')
+			{
+				pRow[i] = 1;
+			}
+			else if (buf[i] == '0')
+			{
+				pRow[i] = 0;
+			}
+			else if (buf[i] == '2')
+			{
+				pRow[i] = 2;
+			}
+			else if (buf[i] == '*' || buf[i] == '?')
+			{
+				pRow[i] = MISSING_VALUE_BIT;
+			}
+			else
+			{
+				YW_ASSERT_INFO(false, "Un-recognized characters in input.");
+				exit(1);
+			}
+		}
+		// Now put it into a list
+		rowsArray.push_back(pRow);
+	}
+
+	// Now set return value
+	nCols = rowLength;
+
+	return res;
 }
 
 #if 0
@@ -551,29 +621,36 @@ bool BioSequenceMatrix :: ReadFromFilePartial( ifstream &inFile, bool fSkipFirst
 #endif
 
 // Dump the content of matrix
-void BioSequenceMatrix ::Dump() const {
-  cout << "positions: Matrix has  ";
-  cout << nCols;
-  cout << " columns and ";
-  cout << rowsArray.size();
-  cout << " rows.\n";
-  for (unsigned int i = 0; i < rowsArray.size(); ++i) {
-    for (int j = 0; j < nCols; ++j) {
-      if (rowsArray[i][j] != MISSING_VALUE_BIT) {
-        cout << rowsArray[i][j];
-      } else {
-        cout << "*";
-      }
-    }
-    cout << endl;
-  }
+void BioSequenceMatrix ::Dump() const
+{
+	cout << "positions: Matrix has  ";
+	cout << nCols;
+	cout << " columns and ";
+	cout << rowsArray.size();
+	cout << " rows.\n";
+	for (unsigned int i = 0; i < rowsArray.size(); ++i)
+	{
+		for (int j = 0; j < nCols; ++j)
+		{
+			if (rowsArray[i][j] != MISSING_VALUE_BIT)
+			{
+				cout << rowsArray[i][j];
+			}
+			else
+			{
+				cout << "*";
+			}
+		}
+		cout << endl;
+	}
 }
 
-void BioSequenceMatrix ::OutputToFile(const char *fileName) const {
-  //
-  ofstream outFile;
-  outFile.open(fileName);
-  OutputToFile(outFile);
+void BioSequenceMatrix ::OutputToFile(const char *fileName) const
+{
+	//
+	ofstream outFile;
+	outFile.open(fileName);
+	OutputToFile(outFile);
 #if 0
 	outFile << "Matrix has  ";
 	outFile << nCols;
@@ -596,323 +673,372 @@ void BioSequenceMatrix ::OutputToFile(const char *fileName) const {
 		outFile << endl;
 	}
 #endif
-  outFile.close();
+	outFile.close();
 }
 
-void BioSequenceMatrix ::OutputToFile(ofstream &outFile) const {
-  //
-  // ofstream outFile;
-  // outFile.open (fileName);
-  outFile << "Matrix has  ";
-  outFile << nCols;
-  outFile << " columns and ";
-  outFile << rowsArray.size();
-  outFile << " rows.\n";
-  for (unsigned int i = 0; i < rowsArray.size(); ++i) {
-    for (int j = 0; j < nCols; ++j) {
-      if (rowsArray[i][j] != MISSING_VALUE_BIT) {
-        outFile << rowsArray[i][j];
-      } else {
-        outFile << "*";
-      }
-    }
-    outFile << endl;
-  }
-  // outFile.close();
+void BioSequenceMatrix ::OutputToFile(ofstream &outFile) const
+{
+	//
+	//ofstream outFile;
+	//outFile.open (fileName);
+	outFile << "Matrix has  ";
+	outFile << nCols;
+	outFile << " columns and ";
+	outFile << rowsArray.size();
+	outFile << " rows.\n";
+	for (unsigned int i = 0; i < rowsArray.size(); ++i)
+	{
+		for (int j = 0; j < nCols; ++j)
+		{
+			if (rowsArray[i][j] != MISSING_VALUE_BIT)
+			{
+				outFile << rowsArray[i][j];
+			}
+			else
+			{
+				outFile << "*";
+			}
+		}
+		outFile << endl;
+	}
+	//outFile.close();
 }
 
-void BioSequenceMatrix ::ExchangeColumns(int c1, int c2) {
-  // This function exchanges two columns in this matrix
-  for (unsigned int i = 0; i < rowsArray.size(); ++i) {
-    int tmp = rowsArray[i][c1];
-    rowsArray[i][c1] = rowsArray[i][c2];
-    rowsArray[i][c2] = tmp;
-  }
+void BioSequenceMatrix ::ExchangeColumns(int c1, int c2)
+{
+	// This function exchanges two columns in this matrix
+	for (unsigned int i = 0; i < rowsArray.size(); ++i)
+	{
+		int tmp = rowsArray[i][c1];
+		rowsArray[i][c1] = rowsArray[i][c2];
+		rowsArray[i][c2] = tmp;
+	}
 }
 
 // Offer direct access, but do not allow direct assignment
-const int &BioSequenceMatrix ::operator()(int r, int c) const {
-  return rowsArray[r][c];
+const int &BioSequenceMatrix ::operator()(int r, int c) const
+{
+	return rowsArray[r][c];
 }
 
-int &BioSequenceMatrix ::operator()(int r, int c) { return rowsArray[r][c]; }
+int &BioSequenceMatrix ::operator()(int r, int c)
+{
+	return rowsArray[r][c];
+}
 
-const int &BioSequenceMatrix ::GetValAt(int r, int c) const {
-  return rowsArray[r][c];
+const int &BioSequenceMatrix ::GetValAt(int r, int c) const
+{
+	return rowsArray[r][c];
 }
 
-void BioSequenceMatrix ::SetValAt(int r, int c, int val) {
-  rowsArray[r][c] = val;
+void BioSequenceMatrix ::SetValAt(int r, int c, int val)
+{
+	rowsArray[r][c] = val;
 }
 
-void BioSequenceMatrix ::GetAllSequences(vector<SEQUENCE> &seqs) const {
-  seqs.clear();
-  for (int i = 0; i < GetRowNum(); ++i) {
-    SEQUENCE row;
-    GetRow(i, row);
-    seqs.push_back(row);
-  }
+void BioSequenceMatrix ::GetAllSequences(vector<SEQUENCE> &seqs) const
+{
+	seqs.clear();
+	for (int i = 0; i < GetRowNum(); ++i)
+	{
+		SEQUENCE row;
+		GetRow(i, row);
+		seqs.push_back(row);
+	}
 }
 
-void BioSequenceMatrix ::SubMatrix(int rt, int rb, int cl, int cr,
-                                   BioSequenceMatrix &submat) const {
-  // This function gets a submatrix, bounded from top row (rt), bottom row (rb)
-  // left column (cl), right column cr
-  submat.Clear();
-  submat.SetSize(rb - rt + 1, cr - cl + 1);
-
-  // Now we set rows
-  for (int i = rt; i <= rb; ++i) {
-    // get a vector of values
-    vector<int> row;
-    for (int j = cl; j <= cr; ++j) {
-      row.push_back(rowsArray[i][j]);
-    }
+void BioSequenceMatrix ::SubMatrix(int rt, int rb, int cl, int cr, BioSequenceMatrix &submat) const
+{
+	// This function gets a submatrix, bounded from top row (rt), bottom row (rb)
+	// left column (cl), right column cr
+	submat.Clear();
+	submat.SetSize(rb - rt + 1, cr - cl + 1);
 
-    // set row to submatrix
-    submat.SetRow(i - rt, row);
-  }
+	// Now we set rows
+	for (int i = rt; i <= rb; ++i)
+	{
+		// get a vector of values
+		vector<int> row;
+		for (int j = cl; j <= cr; ++j)
+		{
+			row.push_back(rowsArray[i][j]);
+		}
+
+		// set row to submatrix
+		submat.SetRow(i - rt, row);
+	}
 }
 
 // This function gets a submatrix from selected sites
-void BioSequenceMatrix ::SubMatrixSelectedSites(
-    const vector<int> &sites, BioSequenceMatrix &submat) const {
-  // This function gets a submatrix, with same number of rows but smaller number
-  // of sites
-  submat.Clear();
-  submat.SetSize(rowsArray.size(), sites.size());
-
-  // Now we set rows
-  for (unsigned int i = 0; i < rowsArray.size(); ++i) {
-    // get a vector of values
-    vector<int> row;
-    for (unsigned int j = 0; j < sites.size(); ++j) {
-      int s = sites[j];
-      YW_ASSERT_INFO(s < GetColNum(),
-                     "SubMatrixSelectedSites: index out of range.");
-      row.push_back(rowsArray[i][s]);
-    }
+void BioSequenceMatrix ::SubMatrixSelectedSites(const vector<int> &sites, BioSequenceMatrix &submat) const
+{
+	// This function gets a submatrix, with same number of rows but smaller number of sites
+	submat.Clear();
+	submat.SetSize(rowsArray.size(), sites.size());
 
-    // set row to submatrix
-    submat.SetRow(i, row);
-  }
+	// Now we set rows
+	for (unsigned int i = 0; i < rowsArray.size(); ++i)
+	{
+		// get a vector of values
+		vector<int> row;
+		for (unsigned int j = 0; j < sites.size(); ++j)
+		{
+			int s = sites[j];
+			YW_ASSERT_INFO(s < GetColNum(), "SubMatrixSelectedSites: index out of range.");
+			row.push_back(rowsArray[i][s]);
+		}
+
+		// set row to submatrix
+		submat.SetRow(i, row);
+	}
 }
 
-void BioSequenceMatrix ::SubMatrixSelectedRows(
-    const vector<int> &rows, BioSequenceMatrix &submat) const {
-  // This function gets a submatrix, with same number of rows but smaller number
-  // of sites
-  submat.Clear();
-  submat.SetSize(rows.size(), nCols);
-
-  // Now set rows
-  for (unsigned int i = 0; i < rows.size(); ++i) {
-    vector<int> r;
-    GetRow(rows[i], r);
-    submat.SetRow(i, r);
-  }
+void BioSequenceMatrix ::SubMatrixSelectedRows(const vector<int> &rows, BioSequenceMatrix &submat) const
+{
+	// This function gets a submatrix, with same number of rows but smaller number of sites
+	submat.Clear();
+	submat.SetSize(rows.size(), nCols);
+
+	// Now set rows
+	for (unsigned int i = 0; i < rows.size(); ++i)
+	{
+		vector<int> r;
+		GetRow(rows[i], r);
+		submat.SetRow(i, r);
+	}
 }
 
-void BioSequenceMatrix ::GetRow(int r, vector<int> &row) const {
-  row.clear();
-  for (int i = 0; i < nCols; ++i) {
-    row.push_back(rowsArray[r][i]);
-  }
+void BioSequenceMatrix ::GetRow(int r, vector<int> &row) const
+{
+	row.clear();
+	for (int i = 0; i < nCols; ++i)
+	{
+		row.push_back(rowsArray[r][i]);
+	}
 }
-void BioSequenceMatrix ::GetCol(int c, vector<int> &col) const {
-  col.clear();
-  for (int i = 0; i < GetRowNum(); ++i) {
-    col.push_back(rowsArray[i][c]);
-  }
+void BioSequenceMatrix ::GetCol(int c, vector<int> &col) const
+{
+	col.clear();
+	for (int i = 0; i < GetRowNum(); ++i)
+	{
+		col.push_back(rowsArray[i][c]);
+	}
 }
 
-int BioSequenceMatrix ::FindRow(const SEQUENCE &seq) const {
-  // This function search the matrix to see if it contains this sequence
-  // return -1 if not found
-  YW_ASSERT_INFO(seq.size() == (unsigned int)GetColNum(),
-                 "Size does not match.");
-
-  for (int i = 0; i < GetRowNum(); ++i) {
-    bool fFound = true;
-    for (int j = 0; j < GetColNum(); ++j) {
-      if (rowsArray[i][j] != seq[j]) {
-        fFound = false;
-        break;
-      }
-    }
-    if (fFound == true) {
-      return i;
-    }
-  }
-  return -1;
+int BioSequenceMatrix ::FindRow(const SEQUENCE &seq) const
+{
+	// This function search the matrix to see if it contains this sequence
+	// return -1 if not found
+	YW_ASSERT_INFO(seq.size() == (unsigned int)GetColNum(), "Size does not match.");
+
+	for (int i = 0; i < GetRowNum(); ++i)
+	{
+		bool fFound = true;
+		for (int j = 0; j < GetColNum(); ++j)
+		{
+			if (rowsArray[i][j] != seq[j])
+			{
+				fFound = false;
+				break;
+			}
+		}
+		if (fFound == true)
+		{
+			return i;
+		}
+	}
+	return -1;
 }
-int BioSequenceMatrix ::FindColumn(const SEQUENCE &seq) const {
-  // This function search the matrix to see if it contains this sequence
-  // return -1 if not found
-  YW_ASSERT_INFO(seq.size() == (unsigned int)GetRowNum(),
-                 "Size does not match.");
-
-  for (int i = 0; i < GetColNum(); ++i) {
-    bool fFound = true;
-    for (int j = 0; j < GetRowNum(); ++j) {
-      if (rowsArray[j][i] != seq[j]) {
-        fFound = false;
-        break;
-      }
-    }
-    if (fFound == true) {
-      // cout << "Col ";
-      // DumpIntVec( seq );
-      // cout << "is in this matrix: ";
-      // this->Dump();
-      return i;
-    }
-  }
-  return -1;
+int BioSequenceMatrix ::FindColumn(const SEQUENCE &seq) const
+{
+	// This function search the matrix to see if it contains this sequence
+	// return -1 if not found
+	YW_ASSERT_INFO(seq.size() == (unsigned int)GetRowNum(), "Size does not match.");
+
+	for (int i = 0; i < GetColNum(); ++i)
+	{
+		bool fFound = true;
+		for (int j = 0; j < GetRowNum(); ++j)
+		{
+			if (rowsArray[j][i] != seq[j])
+			{
+				fFound = false;
+				break;
+			}
+		}
+		if (fFound == true)
+		{
+			//cout << "Col ";
+			//DumpIntVec( seq );
+			//cout << "is in this matrix: ";
+			//this->Dump();
+			return i;
+		}
+	}
+	return -1;
 }
 
-// This function removes a set of columns that are specified in the set as
-// duplicateSites
-void BioSequenceMatrix ::RemoveColumns(set<int> &duplicateSites) {
-  if (duplicateSites.size() == 0)
-    return;
-
-  // now we create  a new matrix with different size
-  for (unsigned int i = 0; i < rowsArray.size(); ++i) {
-    int *buf = new int[nCols - duplicateSites.size()];
-    int cPos = 0;
-    for (int j = 0; j < nCols; ++j) {
-      if (duplicateSites.find(j) == duplicateSites.end()) {
-        // j is not duplicate, so we should copy it
-        buf[cPos++] = rowsArray[i][j];
-      }
-    }
+// This function removes a set of columns that are specified in the set as duplicateSites
+void BioSequenceMatrix ::RemoveColumns(set<int> &duplicateSites)
+{
+	if (duplicateSites.size() == 0)
+		return;
 
-    // now we free the memory of old buffer
-    delete[] rowsArray[i];
-    rowsArray[i] = buf;
-  }
+	// now we create  a new matrix with different size
+	for (unsigned int i = 0; i < rowsArray.size(); ++i)
+	{
+		int *buf = new int[nCols - duplicateSites.size()];
+		int cPos = 0;
+		for (int j = 0; j < nCols; ++j)
+		{
+			if (duplicateSites.find(j) == duplicateSites.end())
+			{
+				// j is not duplicate, so we should copy it
+				buf[cPos++] = rowsArray[i][j];
+			}
+		}
+
+		// now we free the memory of old buffer
+		delete[] rowsArray[i];
+		rowsArray[i] = buf;
+	}
 
-  nCols -= duplicateSites.size();
+	nCols -= duplicateSites.size();
 }
 
 // Remove one row from matrix
-void BioSequenceMatrix ::RemoveRows(set<int> &setRows) {
-  vector<int *> saveMat;
-
-  for (unsigned int i = 0; i < rowsArray.size(); ++i) {
-    if (setRows.find(i) == setRows.end()) {
-      // Only if row i is not inside the rows set, we will save it
-      saveMat.push_back(rowsArray[i]);
-    } else {
-      // Ohterwise, we free it
-      delete[] rowsArray[i];
-    }
-  }
+void BioSequenceMatrix ::RemoveRows(set<int> &setRows)
+{
+	vector<int *> saveMat;
 
-  /*
-          Now revert back
-  */
-  rowsArray.clear();
-  rowsArray.swap(saveMat);
+	for (unsigned int i = 0; i < rowsArray.size(); ++i)
+	{
+		if (setRows.find(i) == setRows.end())
+		{
+			// Only if row i is not inside the rows set, we will save it
+			saveMat.push_back(rowsArray[i]);
+		}
+		else
+		{
+			// Ohterwise, we free it
+			delete[] rowsArray[i];
+		}
+	}
+
+	/*
+		Now revert back
+	*/
+	rowsArray.clear();
+	rowsArray.swap(saveMat);
 }
 
-void BioSequenceMatrix ::SetSize(int nr, int nc) {
-  // This function initialize a nr by nc matrix
-  // and by default, fill in all 0 (false)
-  nCols = nc;
-  for (int i = 0; i < nr; ++i) {
-    int *buf = new int[nc];
-    for (int j = 0; j < nc; ++j) {
-      buf[j] = 0;
-    }
-    rowsArray.push_back(buf);
-  }
+void BioSequenceMatrix ::SetSize(int nr, int nc)
+{
+	// This function initialize a nr by nc matrix
+	// and by default, fill in all 0 (false)
+	nCols = nc;
+	for (int i = 0; i < nr; ++i)
+	{
+		int *buf = new int[nc];
+		for (int j = 0; j < nc; ++j)
+		{
+			buf[j] = 0;
+		}
+		rowsArray.push_back(buf);
+	}
 }
 
-void BioSequenceMatrix ::FindNgbrDupCompSites(set<int> *pRemovedSet) {
-  set<int> setOfRemovals; // contains sites to be removed
-  int cleft = 0;
-  while (cleft < nCols - 1) {
-    // Check to see if the next row  immediately is complement or not
-    if (IsColComplement(cleft, cleft + 1) == true ||
-        IsColDuplicate(cleft, cleft + 1) == true) {
-      setOfRemovals.insert(cleft + 1);
-      // cout << "Site " << cleft+1 << " is same/complement." << endl;
-    }
-    // Consider  next site
-    cleft++;
-  }
-  if (pRemovedSet != NULL) {
-    pRemovedSet->clear();
-    *pRemovedSet = setOfRemovals;
-  }
-  // Finally, remove columns
-  //	RemoveColumns( setOfRemovals );
+void BioSequenceMatrix ::FindNgbrDupCompSites(set<int> *pRemovedSet)
+{
+	set<int> setOfRemovals; // contains sites to be removed
+	int cleft = 0;
+	while (cleft < nCols - 1)
+	{
+		// Check to see if the next row  immediately is complement or not
+		if (IsColComplement(cleft, cleft + 1) == true || IsColDuplicate(cleft, cleft + 1) == true)
+		{
+			setOfRemovals.insert(cleft + 1);
+			//cout << "Site " << cleft+1 << " is same/complement." << endl;
+		}
+		// Consider  next site
+		cleft++;
+	}
+	if (pRemovedSet != NULL)
+	{
+		pRemovedSet->clear();
+		*pRemovedSet = setOfRemovals;
+	}
+	// Finally, remove columns
+	//	RemoveColumns( setOfRemovals );
 }
 
-void BioSequenceMatrix ::GetSeqsFeqs(map<SEQUENCE, int> &mapSeqFreqs) {
-  // Insert, for each sequence, how many times they appears in the matrix
-  mapSeqFreqs.clear();
-
-  for (int r = 0; r < GetRowNum(); ++r) {
-    SEQUENCE row;
-    GetRow(r, row);
-    if (mapSeqFreqs.find(row) == mapSeqFreqs.end()) {
-      map<SEQUENCE, int>::value_type p(row, 1);
-      mapSeqFreqs.insert(p);
-    } else {
-      mapSeqFreqs[row]++;
-    }
-  }
+void BioSequenceMatrix ::GetSeqsFeqs(map<SEQUENCE, int> &mapSeqFreqs)
+{
+	// Insert, for each sequence, how many times they appears in the matrix
+	mapSeqFreqs.clear();
+
+	for (int r = 0; r < GetRowNum(); ++r)
+	{
+		SEQUENCE row;
+		GetRow(r, row);
+		if (mapSeqFreqs.find(row) == mapSeqFreqs.end())
+		{
+			map<SEQUENCE, int>::value_type p(row, 1);
+			mapSeqFreqs.insert(p);
+		}
+		else
+		{
+			mapSeqFreqs[row]++;
+		}
+	}
 }
 
-void BioSequenceMatrix ::GetSeqsOccurrence(
-    map<SEQUENCE, set<int> > &mapSeqOccurs) {
-  // For each distinct seq, find their occurance (which rows match this seq)
-  mapSeqOccurs.clear();
-
-  for (int r = 0; r < GetRowNum(); ++r) {
-    SEQUENCE row;
-    GetRow(r, row);
-    if (mapSeqOccurs.find(row) == mapSeqOccurs.end()) {
-      set<int> ss;
-      map<SEQUENCE, set<int> >::value_type p(row, ss);
-      mapSeqOccurs.insert(p);
-    }
-    mapSeqOccurs[row].insert(r);
-  }
+void BioSequenceMatrix ::GetSeqsOccurrence(map<SEQUENCE, set<int>> &mapSeqOccurs)
+{
+	// For each distinct seq, find their occurance (which rows match this seq)
+	mapSeqOccurs.clear();
+
+	for (int r = 0; r < GetRowNum(); ++r)
+	{
+		SEQUENCE row;
+		GetRow(r, row);
+		if (mapSeqOccurs.find(row) == mapSeqOccurs.end())
+		{
+			set<int> ss;
+			map<SEQUENCE, set<int>>::value_type p(row, ss);
+			mapSeqOccurs.insert(p);
+		}
+		mapSeqOccurs[row].insert(r);
+	}
 }
 
-bool BioSequenceMatrix ::IsIntervalConsistent(int r1, int left1, int right1,
-                                              int r2, int left2,
-                                              int right2) const {
-  // cout << "r1 = " << r1 << ", left1 = " << left1 << ", right1 = " << right1 ;
-  // cout << ", r2 = " << r2 << ", left2 = " << left2 << ", right2 = " << right2
-  // << endl;
-  // Test if the two interval are consistent (i.e. has the same value at the
-  // overlap
-  INTERVAL iv1(left1, right1);
-  INTERVAL iv2(left2, right2);
-  INTERVAL ivInt;
-  if (GetIntervalOverlap(iv1, iv2, ivInt) == false) {
-    // If the interval are not overlapping, yes, they are consistent
-    return true;
-  }
-  // cout << "intersection: left = " << ivInt.first << ", right = " <<
-  // ivInt.second << endl;
-  SEQUENCE row1;
-  GetRow(r1, row1);
-  SEQUENCE row1IV;
-  GetSeqInterval(row1, row1IV, ivInt.first, ivInt.second);
-  SEQUENCE row2;
-  GetRow(r2, row2);
-  SEQUENCE row2IV;
-  GetSeqInterval(row2, row2IV, ivInt.first, ivInt.second);
-  return (row1IV == row2IV);
+bool BioSequenceMatrix ::IsIntervalConsistent(int r1, int left1, int right1, int r2, int left2, int right2) const
+{
+	//cout << "r1 = " << r1 << ", left1 = " << left1 << ", right1 = " << right1 ;
+	//cout << ", r2 = " << r2 << ", left2 = " << left2 << ", right2 = " << right2  << endl;
+	// Test if the two interval are consistent (i.e. has the same value at the overlap
+	INTERVAL iv1(left1, right1);
+	INTERVAL iv2(left2, right2);
+	INTERVAL ivInt;
+	if (GetIntervalOverlap(iv1, iv2, ivInt) == false)
+	{
+		// If the interval are not overlapping, yes, they are consistent
+		return true;
+	}
+	//cout << "intersection: left = " << ivInt.first << ", right = " << ivInt.second << endl;
+	SEQUENCE row1;
+	GetRow(r1, row1);
+	SEQUENCE row1IV;
+	GetSeqInterval(row1, row1IV, ivInt.first, ivInt.second);
+	SEQUENCE row2;
+	GetRow(r2, row2);
+	SEQUENCE row2IV;
+	GetSeqInterval(row2, row2IV, ivInt.first, ivInt.second);
+	return (row1IV == row2IV);
 }
 
-bool BioSequenceMatrix ::IsMissingValue() {
+bool BioSequenceMatrix ::IsMissingValue()
+{
 #if 0
     // A rather inefficient way of doing things
     if( fMissingValue == true )
@@ -921,116 +1047,139 @@ bool BioSequenceMatrix ::IsMissingValue() {
     }
 #endif
 
-  // now double check to make sure
-  for (int r = 0; r < GetRowNum(); ++r) {
-    for (int c = 0; c < GetColNum(); ++c) {
-      if (GetValAt(r, c) == MISSING_VALUE_BIT) {
-        //    fMissingValue = true;
-        return true;
-      }
-    }
-  }
-  return false;
+	// now double check to make sure
+	for (int r = 0; r < GetRowNum(); ++r)
+	{
+		for (int c = 0; c < GetColNum(); ++c)
+		{
+			if (GetValAt(r, c) == MISSING_VALUE_BIT)
+			{
+				//    fMissingValue = true;
+				return true;
+			}
+		}
+	}
+	return false;
 }
 
-bool BioSequenceMatrix ::IsMissingValueInSite(int c) {
-  // now double check to make sure
-  for (int r = 0; r < GetRowNum(); ++r) {
-    if (GetValAt(r, c) == MISSING_VALUE_BIT) {
-      //    fMissingValue = true;
-      return true;
-    }
-  }
-  return false;
+bool BioSequenceMatrix ::IsMissingValueInSite(int c)
+{
+	// now double check to make sure
+	for (int r = 0; r < GetRowNum(); ++r)
+	{
+		if (GetValAt(r, c) == MISSING_VALUE_BIT)
+		{
+			//    fMissingValue = true;
+			return true;
+		}
+	}
+	return false;
 }
 
-bool BioSequenceMatrix ::IsMissingValueInRow(int r) {
-  return GetMissingValueNumInRow(r) > 0;
+bool BioSequenceMatrix ::IsMissingValueInRow(int r)
+{
+	return GetMissingValueNumInRow(r) > 0;
 }
 
-int BioSequenceMatrix ::GetMissingValueNumInRow(int r) {
-  int res = 0;
-  // now double check to make sure
-  for (int c = 0; c < GetColNum(); ++c) {
-    if (GetValAt(r, c) == MISSING_VALUE_BIT) {
-      res++;
-    }
-  }
-  return res;
+int BioSequenceMatrix ::GetMissingValueNumInRow(int r)
+{
+	int res = 0;
+	// now double check to make sure
+	for (int c = 0; c < GetColNum(); ++c)
+	{
+		if (GetValAt(r, c) == MISSING_VALUE_BIT)
+		{
+			res++;
+		}
+	}
+	return res;
 }
 
-void BioSequenceMatrix ::MapDupToNodup(map<int, int> &mapDupToNodup) const {
-  // create a mapping from no-duplicate to duplicate indices
-  set<int> rowsProcessed;
+void BioSequenceMatrix ::MapDupToNodup(map<int, int> &mapDupToNodup) const
+{
+	// create a mapping from no-duplicate to duplicate indices
+	set<int> rowsProcessed;
 
-  int rowNoDup = 0;
-  for (int r = 0; r < GetRowNum(); ++r) {
-    if (rowsProcessed.find(r) != rowsProcessed.end()) {
-      continue;
-    }
-    SEQUENCE seq;
-    GetRow(r, seq);
-    set<int> identRows;
-    GetMultiplictyForRow(seq, identRows);
-    // cout << "seq = ";
-    // DumpSequence( seq );
-    // DumpIntSet( identRows );
-    // Add to map
-    for (set<int>::iterator it = identRows.begin(); it != identRows.end();
-         ++it) {
-      int ss = *it;
-      mapDupToNodup.insert(map<int, int>::value_type(ss, rowNoDup));
-    }
-    rowNoDup++;
-    UnionSets(rowsProcessed, identRows);
-  }
+	int rowNoDup = 0;
+	for (int r = 0; r < GetRowNum(); ++r)
+	{
+		if (rowsProcessed.find(r) != rowsProcessed.end())
+		{
+			continue;
+		}
+		SEQUENCE seq;
+		GetRow(r, seq);
+		set<int> identRows;
+		GetMultiplictyForRow(seq, identRows);
+		//cout << "seq = ";
+		//DumpSequence( seq );
+		//DumpIntSet( identRows );
+		// Add to map
+		for (set<int>::iterator it = identRows.begin(); it != identRows.end(); ++it)
+		{
+			int ss = *it;
+			mapDupToNodup.insert(map<int, int>::value_type(ss, rowNoDup));
+		}
+		rowNoDup++;
+		UnionSets(rowsProcessed, identRows);
+	}
 }
 
-int BioSequenceMatrix ::GetNodupRowsNum(vector<int> *pListUniqeRowIndex) const {
-  int res = 0;
-  for (int r = 0; r < GetRowNum(); ++r) {
-    SEQUENCE seq;
-    GetRow(r, seq);
-    // cout << "seq = ";
-    // DumpSequence( seq );
-    // DumpIntSet( identRows );
-    // Add to map
-    bool fUnique = true;
-    for (int r2 = 0; r2 < r; ++r2) {
-      SEQUENCE seq2;
-      GetRow(r2, seq2);
-      if (seq2 == seq) {
-        fUnique = false;
-        break;
-      }
-    }
-    if (fUnique == true) {
-      res++;
-      if (pListUniqeRowIndex != NULL) {
-        pListUniqeRowIndex->push_back(r);
-      }
-    }
-  }
-  return res;
+int BioSequenceMatrix ::GetNodupRowsNum(vector<int> *pListUniqeRowIndex) const
+{
+	int res = 0;
+	for (int r = 0; r < GetRowNum(); ++r)
+	{
+		SEQUENCE seq;
+		GetRow(r, seq);
+		//cout << "seq = ";
+		//DumpSequence( seq );
+		//DumpIntSet( identRows );
+		// Add to map
+		bool fUnique = true;
+		for (int r2 = 0; r2 < r; ++r2)
+		{
+			SEQUENCE seq2;
+			GetRow(r2, seq2);
+			if (seq2 == seq)
+			{
+				fUnique = false;
+				break;
+			}
+		}
+		if (fUnique == true)
+		{
+			res++;
+			if (pListUniqeRowIndex != NULL)
+			{
+				pListUniqeRowIndex->push_back(r);
+			}
+		}
+	}
+	return res;
 }
 
-////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////
 //		Inernal utility functions
-////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////
 
-bool BioSequenceMatrix ::CmpColumns(int c1, int c2) {
-  bool res = true;
+bool BioSequenceMatrix ::CmpColumns(int c1, int c2)
+{
+	bool res = true;
 
-  if (c1 == c2) {
-    return true;
-  }
+	if (c1 == c2)
+	{
+		return true;
+	}
 
-  for (unsigned int i = 0; i < rowsArray.size(); ++i) {
-    if (rowsArray[i][c1] != rowsArray[i][c2]) {
-      res = false;
-      break;
-    }
-  }
+	for (unsigned int i = 0; i < rowsArray.size(); ++i)
+	{
+		if (rowsArray[i][c1] != rowsArray[i][c2])
+		{
+			res = false;
+			break;
+		}
+	}
 
-  return res;
+	return res;
 }
diff --git a/trisicell/external/scistree/BioSequenceMatrix.h b/trisicell/external/scistree/BioSequenceMatrix.h
index 55f2157..282a69f 100644
--- a/trisicell/external/scistree/BioSequenceMatrix.h
+++ b/trisicell/external/scistree/BioSequenceMatrix.h
@@ -1,13 +1,13 @@
 #ifndef BIO_SEQUENCE_MATRIX_H
 #define BIO_SEQUENCE_MATRIX_H
 
-#include <fstream>
-#include <iostream>
 #include <list>
-#include <map>
+#include <vector>
 #include <set>
 #include <string>
-#include <vector>
+#include <iostream>
+#include <fstream>
+#include <map>
 using namespace std;
 
 #include "Utils.h"
@@ -20,96 +20,90 @@ using namespace std;
 // ***************************************************************************
 // Define a reusable binary matrix class
 // ***************************************************************************
-class BioSequenceMatrix {
+class BioSequenceMatrix
+{
 public:
-  // BioSequenceMatrix();
-  virtual ~BioSequenceMatrix() = 0;
+    //BioSequenceMatrix();
+    virtual ~BioSequenceMatrix() = 0;
 
-  // Important interface functions we need
-  virtual bool IsDataValid(int val) = 0; // check to see if this data is good
-                                         // for this class e.g. for genotype
-                                         // data, 0, 1, 2
-  void SetSize(int nr, int nc);
+    // Important interface functions we need
+    virtual bool IsDataValid(int val) = 0; // check to see if this data is good for this class
+                                           // e.g. for genotype data, 0, 1, 2
+    void SetSize(int nr, int nc);
 
-  // Matrix editing functions
-  void AppendRow(const vector<int> &row);
-  void AppendSetOfRows(const set<SEQUENCE> &rows);
-  void AppendRows(const vector<SEQUENCE> &rows);
-  void InsertColumns(const vector<SEQUENCE> &sitesValue,
-                     const vector<int> &sitesPos);
-  void SetRow(int i, const vector<int> &valNew);
-  void SetCol(int i, const vector<int> &valNew);
-  void Clear();
-  void Copy(const BioSequenceMatrix &rhs);
-  virtual bool ReadFromFile(ifstream &inFile, bool fSkipFirstLine = true);
-  // virtual bool ReadFromFilePartial( ifstream &inFile, bool fSkipFirstLine );
-  void Dump() const;
-  void OutputToFile(const char *fileName) const;
-  void OutputToFile(ofstream &outFile) const;
-  void RemoveRow(int rowIndex);
-  void ExchangeColumns(int r1, int r2);
-  void RemoveColumns(set<int> &duplicateSites);
-  void RemoveRows(set<int> &setRows);
-  void TrimDupRows(set<int> *pTrimedRows = NULL,
-                   vector<pair<int, int> > *pTrimRowInfo = NULL);
-  virtual void FindNgbrDupCompSites(set<int> *pRemovedSet = NULL);
-  virtual bool IsColComplement(int c1, int c2) = 0;
-  virtual bool IsColDuplicate(int c1, int c2) = 0;
-  void AppendMatrixByCol(const BioSequenceMatrix &appendedMat);
-  void AppendMatrixByRow(const BioSequenceMatrix &appendedMat);
+    // Matrix editing functions
+    void AppendRow(const vector<int> &row);
+    void AppendSetOfRows(const set<SEQUENCE> &rows);
+    void AppendRows(const vector<SEQUENCE> &rows);
+    void InsertColumns(const vector<SEQUENCE> &sitesValue, const vector<int> &sitesPos);
+    void SetRow(int i, const vector<int> &valNew);
+    void SetCol(int i, const vector<int> &valNew);
+    void Clear();
+    void Copy(const BioSequenceMatrix &rhs);
+    virtual bool ReadFromFile(ifstream &inFile, bool fSkipFirstLine = true);
+    //virtual bool ReadFromFilePartial( ifstream &inFile, bool fSkipFirstLine );
+    void Dump() const;
+    void OutputToFile(const char *fileName) const;
+    void OutputToFile(ofstream &outFile) const;
+    void RemoveRow(int rowIndex);
+    void ExchangeColumns(int r1, int r2);
+    void RemoveColumns(set<int> &duplicateSites);
+    void RemoveRows(set<int> &setRows);
+    void TrimDupRows(set<int> *pTrimedRows = NULL, vector<pair<int, int>> *pTrimRowInfo = NULL);
+    virtual void FindNgbrDupCompSites(set<int> *pRemovedSet = NULL);
+    virtual bool IsColComplement(int c1, int c2) = 0;
+    virtual bool IsColDuplicate(int c1, int c2) = 0;
+    void AppendMatrixByCol(const BioSequenceMatrix &appendedMat);
+    void AppendMatrixByRow(const BioSequenceMatrix &appendedMat);
 
-  // Overload operator for [], like a[1, 2]
-  const int &operator()(int r, int c) const;
-  int &operator()(int r, int c);
-  const int &GetValAt(int r, int c) const;
-  void SetValAt(int r, int c, int val);
+    // Overload operator for [], like a[1, 2]
+    const int &operator()(int r, int c) const;
+    int &operator()(int r, int c);
+    const int &GetValAt(int r, int c) const;
+    void SetValAt(int r, int c, int val);
 
-  // Access matrix
-  bool IsEmpty() const { return GetColNum() == 0 || GetRowNum() == 0; }
-  int GetColNum() const { return nCols; }
-  int GetRowNum() const { return rowsArray.size(); }
-  void GetRow(int r, vector<int> &row) const;
-  void GetCol(int c, vector<int> &col) const;
-  int FindRow(const SEQUENCE &seq) const;
-  int FindColumn(const SEQUENCE &seq) const;
-  void SubMatrix(int rt, int rb, int cl, int cr,
-                 BioSequenceMatrix &submat) const;
-  void SubMatrixSelectedSites(const vector<int> &sites,
-                              BioSequenceMatrix &submat) const;
-  void SubMatrixSelectedRows(const vector<int> &rows,
-                             BioSequenceMatrix &submat) const;
-  void GetAllSequences(vector<SEQUENCE> &seqs) const;
-  void GetSeqsFeqs(map<SEQUENCE, int> &mapSeqFreqs);
-  void GetSeqsOccurrence(map<SEQUENCE, set<int> > &mapSeqOccurs);
-  virtual int GetMajorityState(int site) = 0;
-  void DumpRowMultiplicity() const;
-  int GetMultiplictyForRow(int r) const;
-  int GetMultiplictyForRow(const SEQUENCE &seq) const;
-  int GetMultiplictyForRow(const SEQUENCE &seq, set<int> &identRows) const;
-  int GetMultiplictyForRowIV(int r, int left, int right) const;
-  void GetColMultiplicityMap(vector<int> &listColMulti) const;
-  bool IsIntervalConsistent(int r1, int left1, int right1, int r2, int left2,
-                            int right2) const;
-  bool IsMissingValue();
-  bool IsMissingValueInSite(int c);
-  bool IsMissingValueInRow(int r);
-  int GetMissingValueNumInRow(int r);
-  void MapDupToNodup(map<int, int> &mapDupToNodup) const;
-  int GetNodupRowsNum(vector<int> *pListUniqeRowIndex) const;
+    // Access matrix
+    bool IsEmpty() const { return GetColNum() == 0 || GetRowNum() == 0; }
+    int GetColNum() const { return nCols; }
+    int GetRowNum() const { return rowsArray.size(); }
+    void GetRow(int r, vector<int> &row) const;
+    void GetCol(int c, vector<int> &col) const;
+    int FindRow(const SEQUENCE &seq) const;
+    int FindColumn(const SEQUENCE &seq) const;
+    void SubMatrix(int rt, int rb, int cl, int cr, BioSequenceMatrix &submat) const;
+    void SubMatrixSelectedSites(const vector<int> &sites, BioSequenceMatrix &submat) const;
+    void SubMatrixSelectedRows(const vector<int> &rows, BioSequenceMatrix &submat) const;
+    void GetAllSequences(vector<SEQUENCE> &seqs) const;
+    void GetSeqsFeqs(map<SEQUENCE, int> &mapSeqFreqs);
+    void GetSeqsOccurrence(map<SEQUENCE, set<int>> &mapSeqOccurs);
+    virtual int GetMajorityState(int site) = 0;
+    void DumpRowMultiplicity() const;
+    int GetMultiplictyForRow(int r) const;
+    int GetMultiplictyForRow(const SEQUENCE &seq) const;
+    int GetMultiplictyForRow(const SEQUENCE &seq, set<int> &identRows) const;
+    int GetMultiplictyForRowIV(int r, int left, int right) const;
+    void GetColMultiplicityMap(vector<int> &listColMulti) const;
+    bool IsIntervalConsistent(int r1, int left1, int right1, int r2, int left2, int right2) const;
+    bool IsMissingValue();
+    bool IsMissingValueInSite(int c);
+    bool IsMissingValueInRow(int r);
+    int GetMissingValueNumInRow(int r);
+    void MapDupToNodup(map<int, int> &mapDupToNodup) const;
+    int GetNodupRowsNum(vector<int> *pListUniqeRowIndex) const;
 
 protected:
-  // Some functions
-  bool CmpColumns(int c1, int c2);
+    // Some functions
+    bool CmpColumns(int c1, int c2);
 
-  // Internal data
-  // we represent a binary matrix as bool type
-  vector<int *> rowsArray; // array of rows
-  int nCols;               // number of sites (columns)
+    // Internal data
+    // we represent a binary matrix as bool type
+    vector<int *> rowsArray; // array of rows
+    int nCols;               // number of sites (columns)
 
 private:
-  // Disable certain operations
-  BioSequenceMatrix &operator=(const BioSequenceMatrix &rhs) { return *this; }
-  // bool fMissingValue;
+    // Disable certain operations
+    BioSequenceMatrix &operator=(const BioSequenceMatrix &rhs) { return *this; }
+    //bool fMissingValue;
 };
 
-#endif // BIO_SEQUENCE_MATRIX_H
+#endif //BIO_SEQUENCE_MATRIX_H
diff --git a/trisicell/external/scistree/GenotypeMatrix.cpp b/trisicell/external/scistree/GenotypeMatrix.cpp
index e4480d2..15da158 100644
--- a/trisicell/external/scistree/GenotypeMatrix.cpp
+++ b/trisicell/external/scistree/GenotypeMatrix.cpp
@@ -1,253 +1,341 @@
 #include "GenotypeMatrix.h"
 #include <cmath>
-#include <cstdio>
 #include <cstdlib>
+#include <cstdio>
 
 // ***************************************************************************
 // Define a reusable binary matrix class
 // ***************************************************************************
 
-GenotypeMatrix ::GenotypeMatrix() { nCols = 0; }
+GenotypeMatrix ::GenotypeMatrix()
+{
+    nCols = 0;
+}
 
-GenotypeMatrix ::~GenotypeMatrix() {
-  // Need to free up data if needed
-  Clear();
+GenotypeMatrix ::~GenotypeMatrix()
+{
+    // Need to free up data if needed
+    Clear();
 }
 
-GenotypeMatrix ::GenotypeMatrix(int nr, int nc) { SetSize(nr, nc); }
+GenotypeMatrix ::GenotypeMatrix(int nr, int nc)
+{
+    SetSize(nr, nc);
+}
 
-GenotypeMatrix ::GenotypeMatrix(const GenotypeMatrix &rhs) { Copy(rhs); }
+GenotypeMatrix ::GenotypeMatrix(const GenotypeMatrix &rhs)
+{
+    Copy(rhs);
+}
 
-GenotypeMatrix &GenotypeMatrix ::operator=(const GenotypeMatrix &rhs) {
-  Clear();
+GenotypeMatrix &GenotypeMatrix ::operator=(const GenotypeMatrix &rhs)
+{
+    Clear();
 
-  Copy(rhs);
+    Copy(rhs);
 
-  return *this;
+    return *this;
 }
 
-bool GenotypeMatrix ::IsDataValid(int val) {
-  if (val == 0 || val == 1 || val == 2) {
-    return true;
-  } else {
-    return false;
-  }
+bool GenotypeMatrix ::IsDataValid(int val)
+{
+    if (val == 0 || val == 1 || val == 2)
+    {
+        return true;
+    }
+    else
+    {
+        return false;
+    }
 }
 
-void GenotypeMatrix ::PreSolve() {
-  // Generate the companion rows
-  SetupCompanionColumns();
+void GenotypeMatrix ::PreSolve()
+{
+    // Generate the companion rows
+    SetupCompanionColumns();
 }
 
-bool GenotypeMatrix ::AreColumnsCompanion(int c1, int c2) {
-  if (c1 == c2) {
-    return false;
-  }
-  if (c1 > c2) {
-    int tmp = c1;
-    c1 = c2;
-    c2 = tmp;
-  }
-  COLUMN_PAIR cp(c1, c2);
-  if (companionRows.find(cp) == companionRows.end() ||
-      companionRows[cp].size() == 0) {
-    return false;
-  } else {
-    return true;
-  }
+bool GenotypeMatrix ::AreColumnsCompanion(int c1, int c2)
+{
+    if (c1 == c2)
+    {
+        return false;
+    }
+    if (c1 > c2)
+    {
+        int tmp = c1;
+        c1 = c2;
+        c2 = tmp;
+    }
+    COLUMN_PAIR cp(c1, c2);
+    if (companionRows.find(cp) == companionRows.end() || companionRows[cp].size() == 0)
+    {
+        return false;
+    }
+    else
+    {
+        return true;
+    }
 }
 
-bool GenotypeMatrix ::AreColumnsForcedInPhase(int c1, int c2) {
-  if (c1 == c2) {
-    return false;
-  }
-  if (c1 > c2) {
-    int tmp = c1;
-    c1 = c2;
-    c2 = tmp;
-  }
-  COLUMN_PAIR cp(c1, c2);
-  if (forcedColumnPairs.find(cp) == forcedColumnPairs.end() ||
-      forcedColumnPairs[cp] == 1) {
-    return false;
-  } else {
-    return true;
-  }
+bool GenotypeMatrix ::AreColumnsForcedInPhase(int c1, int c2)
+{
+    if (c1 == c2)
+    {
+        return false;
+    }
+    if (c1 > c2)
+    {
+        int tmp = c1;
+        c1 = c2;
+        c2 = tmp;
+    }
+    COLUMN_PAIR cp(c1, c2);
+    if (forcedColumnPairs.find(cp) == forcedColumnPairs.end() || forcedColumnPairs[cp] == 1)
+    {
+        return false;
+    }
+    else
+    {
+        return true;
+    }
 }
 
-bool GenotypeMatrix ::AreColumnsForcedOutPhase(int c1, int c2) {
-  if (c1 == c2) {
-    return false;
-  }
-  if (c1 > c2) {
-    int tmp = c1;
-    c1 = c2;
-    c2 = tmp;
-  }
-  COLUMN_PAIR cp(c1, c2);
-  if (forcedColumnPairs.find(cp) == forcedColumnPairs.end() ||
-      forcedColumnPairs[cp] == 0) {
+bool GenotypeMatrix ::AreColumnsForcedOutPhase(int c1, int c2)
+{
+    if (c1 == c2)
+    {
+        return false;
+    }
+    if (c1 > c2)
+    {
+        int tmp = c1;
+        c1 = c2;
+        c2 = tmp;
+    }
+    COLUMN_PAIR cp(c1, c2);
+    if (forcedColumnPairs.find(cp) == forcedColumnPairs.end() || forcedColumnPairs[cp] == 0)
+    {
+        return false;
+    }
+    else
+    {
+        return true;
+    }
+}
+
+bool GenotypeMatrix ::AreColumnsComplete(int c1, int c2)
+{
+    if (c1 == c2)
+    {
+        return false;
+    }
+    if (c1 > c2)
+    {
+        int tmp = c1;
+        c1 = c2;
+        c2 = tmp;
+    }
+    COLUMN_PAIR cp(c1, c2);
+    for (int i = 0; i < completePairs.size(); ++i)
+    {
+        if (completePairs[i] == cp)
+        {
+            return true;
+        }
+    }
     return false;
-  } else {
-    return true;
-  }
 }
 
-bool GenotypeMatrix ::AreColumnsComplete(int c1, int c2) {
-  if (c1 == c2) {
+int GenotypeMatrix ::GetNumTwosInRow(int r)
+{
+    // For now, it is not optimized yet
+    // we simply count the number of twos
+    // later we can rely on preprocessing
+    int res = 0;
+    for (int i = 0; i < GetColNum(); ++i)
+    {
+        if (rowsArray[r][i] == 2)
+        {
+            ++res;
+        }
+    }
+    return res;
+}
+
+bool GenotypeMatrix ::IsSiteTrival(int site)
+{
+    int numTwos = 0;
+    int numZeros = 0;
+    int numOnes = 0;
+    for (int i = 0; i < GetRowNum(); ++i)
+    {
+        if (rowsArray[i][site] == 0)
+        {
+            numZeros++;
+        }
+        else if (rowsArray[i][site] == 1)
+        {
+            numOnes++;
+        }
+        else if (rowsArray[i][site] == 2)
+        {
+            numTwos++;
+        }
+        else
+        {
+            YW_ASSERT(false);
+        }
+    }
+    if (numTwos <= 1 && (numZeros == 0 || numOnes == 0))
+    {
+        return true;
+    }
+    else
+    {
+        return false;
+    }
+}
+
+bool GenotypeMatrix ::IsColComplement(int c1, int c2)
+{
+    YW_ASSERT_INFO(false, "Not implemented");
     return false;
-  }
-  if (c1 > c2) {
-    int tmp = c1;
-    c1 = c2;
-    c2 = tmp;
-  }
-  COLUMN_PAIR cp(c1, c2);
-  for (int i = 0; i < completePairs.size(); ++i) {
-    if (completePairs[i] == cp) {
-      return true;
-    }
-  }
-  return false;
-}
-
-int GenotypeMatrix ::GetNumTwosInRow(int r) {
-  // For now, it is not optimized yet
-  // we simply count the number of twos
-  // later we can rely on preprocessing
-  int res = 0;
-  for (int i = 0; i < GetColNum(); ++i) {
-    if (rowsArray[r][i] == 2) {
-      ++res;
-    }
-  }
-  return res;
-}
-
-bool GenotypeMatrix ::IsSiteTrival(int site) {
-  int numTwos = 0;
-  int numZeros = 0;
-  int numOnes = 0;
-  for (int i = 0; i < GetRowNum(); ++i) {
-    if (rowsArray[i][site] == 0) {
-      numZeros++;
-    } else if (rowsArray[i][site] == 1) {
-      numOnes++;
-    } else if (rowsArray[i][site] == 2) {
-      numTwos++;
-    } else {
-      YW_ASSERT(false);
-    }
-  }
-  if (numTwos <= 1 && (numZeros == 0 || numOnes == 0)) {
+}
+
+bool GenotypeMatrix ::IsColDuplicate(int c1, int c2)
+{
+    for (int i = 0; i < GetRowNum(); ++i)
+    {
+        if (rowsArray[i][c1] != rowsArray[i][c2])
+        {
+            return false;
+        }
+    }
     return true;
-  } else {
-    return false;
-  }
-}
-
-bool GenotypeMatrix ::IsColComplement(int c1, int c2) {
-  YW_ASSERT_INFO(false, "Not implemented");
-  return false;
-}
-
-bool GenotypeMatrix ::IsColDuplicate(int c1, int c2) {
-  for (int i = 0; i < GetRowNum(); ++i) {
-    if (rowsArray[i][c1] != rowsArray[i][c2]) {
-      return false;
-    }
-  }
-  return true;
-}
-
-int GenotypeMatrix ::GetMajorityState(int site) {
-  int numTwos = 0;
-  int numZeros = 0;
-  int numOnes = 0;
-  for (int i = 0; i < GetRowNum(); ++i) {
-    if (rowsArray[i][site] == 0) {
-      numZeros++;
-    } else if (rowsArray[i][site] == 1) {
-      numOnes++;
-    } else if (rowsArray[i][site] == 2) {
-      numTwos++;
-    } else {
-      YW_ASSERT(false);
-    }
-  }
-  int ma = 0;
-  int max = numZeros;
-  if (max < numOnes) {
-    max = numOnes;
-    ma = 1;
-  }
-  if (max < numTwos) {
-    max = numTwos;
-    ma = 2;
-  }
-  return ma;
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////
-// IMPLEMENTATION DETAILS
-void GenotypeMatrix ::SetupCompanionColumns() {
-  // This function checks the data and fill in the companion rows for every pair
-  // of columns
-  for (int i = 0; i < nCols; ++i) {
-    for (int j = i + 1; j < nCols; ++j) {
-      COLUMN_PAIR cp(i, j);
-      set<int> cmpnRows;
-      // The following 4 variables shows what are know already for (i,j)
-      bool found00 = false, found01 = false;
-      bool found10 = false, found11 = false;
-      ;
-      for (int k = 0; k < GetRowNum(); ++k) {
-        if (rowsArray[k][i] == 2 && rowsArray[k][j] == 2) {
-          cmpnRows.insert(k);
-        } else if (rowsArray[k][i] == 0 &&
-                   rowsArray[k][j] == 2) // Now check for forced pattern
+}
+
+int GenotypeMatrix ::GetMajorityState(int site)
+{
+    int numTwos = 0;
+    int numZeros = 0;
+    int numOnes = 0;
+    for (int i = 0; i < GetRowNum(); ++i)
+    {
+        if (rowsArray[i][site] == 0)
         {
-          found00 = true;
-          found01 = true;
-        } else if (rowsArray[k][i] == 1 && rowsArray[k][j] == 2) {
-          found10 = true;
-          found11 = true;
-        } else if (rowsArray[k][i] == 2 && rowsArray[k][j] == 0) {
-          found00 = true;
-          found10 = true;
-        } else if (rowsArray[k][i] == 2 && rowsArray[k][j] == 1) {
-          found01 = true;
-          found11 = true;
-        } else if (rowsArray[k][i] == 0 && rowsArray[k][j] == 0) {
-          found00 = true;
-        } else if (rowsArray[k][i] == 0 && rowsArray[k][j] == 1) {
-          found01 = true;
-        } else if (rowsArray[k][i] == 1 && rowsArray[k][j] == 0) {
-          found10 = true;
-        } else if (rowsArray[k][i] == 1 && rowsArray[k][j] == 1) {
-          found11 = true;
+            numZeros++;
         }
-      }
-      // Now we add this to our map
-      if (cmpnRows.size() > 0) {
-        companionRows.insert(COMPANION_ROW_MAP::value_type(cp, cmpnRows));
-      }
-
-      // We also record the forced pattern
-      if (found00 == true && found11 == true && found01 == true &&
-          found10 == true) {
-        // In this case we already have a complete pair
-        completePairs.push_back(cp);
-      } else {
-        // This is not a complete pair
-        if (found00 == true && found11 == true) {
-          forcedColumnPairs.insert(
-              FORCED_COL_MAP::value_type(cp, 0)); // forced in phase
-        } else if (found01 == true && found10 == true) {
-          forcedColumnPairs.insert(
-              FORCED_COL_MAP::value_type(cp, 1)); // forced in phase
+        else if (rowsArray[i][site] == 1)
+        {
+            numOnes++;
+        }
+        else if (rowsArray[i][site] == 2)
+        {
+            numTwos++;
+        }
+        else
+        {
+            YW_ASSERT(false);
+        }
+    }
+    int ma = 0;
+    int max = numZeros;
+    if (max < numOnes)
+    {
+        max = numOnes;
+        ma = 1;
+    }
+    if (max < numTwos)
+    {
+        max = numTwos;
+        ma = 2;
+    }
+    return ma;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// IMPLEMENTATION DETAILS
+void GenotypeMatrix ::SetupCompanionColumns()
+{
+    // This function checks the data and fill in the companion rows for every pair of columns
+    for (int i = 0; i < nCols; ++i)
+    {
+        for (int j = i + 1; j < nCols; ++j)
+        {
+            COLUMN_PAIR cp(i, j);
+            set<int> cmpnRows;
+            // The following 4 variables shows what are know already for (i,j)
+            bool found00 = false, found01 = false;
+            bool found10 = false, found11 = false;
+            ;
+            for (int k = 0; k < GetRowNum(); ++k)
+            {
+                if (rowsArray[k][i] == 2 && rowsArray[k][j] == 2)
+                {
+                    cmpnRows.insert(k);
+                }
+                else if (rowsArray[k][i] == 0 && rowsArray[k][j] == 2) // Now check for forced pattern
+                {
+                    found00 = true;
+                    found01 = true;
+                }
+                else if (rowsArray[k][i] == 1 && rowsArray[k][j] == 2)
+                {
+                    found10 = true;
+                    found11 = true;
+                }
+                else if (rowsArray[k][i] == 2 && rowsArray[k][j] == 0)
+                {
+                    found00 = true;
+                    found10 = true;
+                }
+                else if (rowsArray[k][i] == 2 && rowsArray[k][j] == 1)
+                {
+                    found01 = true;
+                    found11 = true;
+                }
+                else if (rowsArray[k][i] == 0 && rowsArray[k][j] == 0)
+                {
+                    found00 = true;
+                }
+                else if (rowsArray[k][i] == 0 && rowsArray[k][j] == 1)
+                {
+                    found01 = true;
+                }
+                else if (rowsArray[k][i] == 1 && rowsArray[k][j] == 0)
+                {
+                    found10 = true;
+                }
+                else if (rowsArray[k][i] == 1 && rowsArray[k][j] == 1)
+                {
+                    found11 = true;
+                }
+            }
+            // Now we add this to our map
+            if (cmpnRows.size() > 0)
+            {
+                companionRows.insert(COMPANION_ROW_MAP::value_type(cp, cmpnRows));
+            }
+
+            // We also record the forced pattern
+            if (found00 == true && found11 == true && found01 == true && found10 == true)
+            {
+                // In this case we already have a complete pair
+                completePairs.push_back(cp);
+            }
+            else
+            {
+                // This is not a complete pair
+                if (found00 == true && found11 == true)
+                {
+                    forcedColumnPairs.insert(FORCED_COL_MAP::value_type(cp, 0)); // forced in phase
+                }
+                else if (found01 == true && found10 == true)
+                {
+                    forcedColumnPairs.insert(FORCED_COL_MAP::value_type(cp, 1)); // forced in phase
+                }
+            }
         }
-      }
     }
-  }
 }
diff --git a/trisicell/external/scistree/GenotypeMatrix.h b/trisicell/external/scistree/GenotypeMatrix.h
index 5254bcc..f24a4e8 100644
--- a/trisicell/external/scistree/GenotypeMatrix.h
+++ b/trisicell/external/scistree/GenotypeMatrix.h
@@ -1,61 +1,60 @@
 #ifndef GENOTYPE_MATRIX_H
 #define GENOTYPE_MATRIX_H
 
-#include <fstream>
-#include <iostream>
 #include <list>
-#include <map>
+#include <vector>
 #include <set>
 #include <string>
-#include <vector>
+#include <iostream>
+#include <fstream>
+#include <map>
 using namespace std;
 
-#include "BioSequenceMatrix.h"
 #include "Utils.h"
+#include "BioSequenceMatrix.h"
 
 typedef pair<int, int> COLUMN_PAIR;
 
 // ***************************************************************************
 // Define a reusable binary matrix class
 // ***************************************************************************
-class GenotypeMatrix : public BioSequenceMatrix {
+class GenotypeMatrix : public BioSequenceMatrix
+{
 public:
-  GenotypeMatrix();
-  ~GenotypeMatrix();
-  GenotypeMatrix(int nr, int nc);
-
-  // Support assignment/copy constructor
-  GenotypeMatrix(const GenotypeMatrix &rhs);
-  GenotypeMatrix &operator=(const GenotypeMatrix &rhs);
-
-  // Important interface functions we need
-  virtual bool IsDataValid(int val); // check to see if this data is good for
-                                     // this class e.g. for genotype data, 0, 1,
-                                     // 2
-  virtual bool IsColComplement(int c1, int c2);
-  virtual bool IsColDuplicate(int c1, int c2);
-  virtual int GetMajorityState(int site);
-
-  // DPPH needs these functions
-  void PreSolve(); // perform neccessary preprocessing. For now, assume DPPH
-  bool AreColumnsCompanion(int c1, int c2);
-  bool AreColumnsForcedInPhase(int c1, int c2);
-  bool AreColumnsForcedOutPhase(int c1, int c2);
-  bool AreColumnsComplete(int c1, int c2);
-  int GetNumTwosInRow(int r);
-  bool IsSiteTrival(int site);
+    GenotypeMatrix();
+    ~GenotypeMatrix();
+    GenotypeMatrix(int nr, int nc);
+
+    // Support assignment/copy constructor
+    GenotypeMatrix(const GenotypeMatrix &rhs);
+    GenotypeMatrix &operator=(const GenotypeMatrix &rhs);
+
+    // Important interface functions we need
+    virtual bool IsDataValid(int val); // check to see if this data is good for this class
+                                       // e.g. for genotype data, 0, 1, 2
+    virtual bool IsColComplement(int c1, int c2);
+    virtual bool IsColDuplicate(int c1, int c2);
+    virtual int GetMajorityState(int site);
+
+    // DPPH needs these functions
+    void PreSolve(); // perform neccessary preprocessing. For now, assume DPPH
+    bool AreColumnsCompanion(int c1, int c2);
+    bool AreColumnsForcedInPhase(int c1, int c2);
+    bool AreColumnsForcedOutPhase(int c1, int c2);
+    bool AreColumnsComplete(int c1, int c2);
+    int GetNumTwosInRow(int r);
+    bool IsSiteTrival(int site);
 
 private:
-  // Internal functions
-  void SetupCompanionColumns(); // Initialize companion rows
-
-  // Private data structures
-  typedef map<COLUMN_PAIR, set<int> > COMPANION_ROW_MAP;
-  COMPANION_ROW_MAP companionRows;
-  typedef map<COLUMN_PAIR, int> FORCED_COL_MAP;
-  FORCED_COL_MAP forcedColumnPairs; // value = 1 if out of phase, = 0 if in
-                                    // phase, otherwise no entry
-  vector<COLUMN_PAIR> completePairs;
+    // Internal functions
+    void SetupCompanionColumns(); // Initialize companion rows
+
+    // Private data structures
+    typedef map<COLUMN_PAIR, set<int>> COMPANION_ROW_MAP;
+    COMPANION_ROW_MAP companionRows;
+    typedef map<COLUMN_PAIR, int> FORCED_COL_MAP;
+    FORCED_COL_MAP forcedColumnPairs; // value = 1 if out of phase, = 0 if in phase, otherwise no entry
+    vector<COLUMN_PAIR> completePairs;
 };
 
-#endif // GENOTYPE_MATRIX_H
+#endif //GENOTYPE_MATRIX_H
diff --git a/trisicell/external/scistree/MarginalTree.cpp b/trisicell/external/scistree/MarginalTree.cpp
index a4d6242..f123efb 100644
--- a/trisicell/external/scistree/MarginalTree.cpp
+++ b/trisicell/external/scistree/MarginalTree.cpp
@@ -1,2225 +1,2443 @@
 #include "MarginalTree.h"
+#include <stack>
+#include <queue>
 #include "PhylogenyTreeBasic.h"
 #include "UnWeightedGraph.h"
 #include "Utils4.h"
-#include <queue>
-#include <stack>
 
-//////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // Define a utility class
 // GLobal utility function
 
-// static void OutputQuotedString(ofstream &outFile, const char *buf)
+//static void OutputQuotedString(ofstream &outFile, const char *buf)
 //{
 //	outFile << '"';
 //	outFile << buf;
 //	outFile << '"';
 //}
 
-void RemapLeafIntLabelsTaxaMap(MarginalTree &mtree,
-                               map<string, string> &mapper) {
-  // map the leaf labels to new integer labels
-  for (int i = 0; i < mtree.GetNumLeaves(); ++i) {
-    int lbl = mtree.GetLabel(i);
-    char buf[100];
-    sprintf(buf, "%d", lbl);
-    string strbuf = buf;
-    YW_ASSERT_INFO(mapper.find(strbuf) != mapper.end(), "Fail to find");
-    string strLbl = mapper[strbuf];
-    int lblNewInt;
-    sscanf(strLbl.c_str(), "%d", &lblNewInt);
-    mtree.SetLabel(i, lblNewInt);
-  }
-}
-
-void RemapMargTree(MarginalTree &mtree, TaxaMapper &refTMapper) {
-  //
-  // map the leaf labels to new integer labels
-  // cout << "RemapMargTree: mtree:" << mtree.GetNewick() << endl;
-  // mtree.Dump();
-  for (int i = 0; i < mtree.GetNumLeaves(); ++i) {
-    int lbl = mtree.GetLabel(i);
-    string strlbl = refTMapper.GetString(lbl);
-    int lblNew = lbl;
-    sscanf(strlbl.c_str(), "%d", &lblNew);
-    mtree.SetLabel(i, lblNew);
-  }
-}
-
-static bool ReadinOneMarginalTree(ifstream &inFile, int numNodes,
-                                  MarginalTree &tree) {
-  // first read in the node ids
-  for (int i = 0; i < numNodes; ++i) {
-    int tmp;
-    inFile >> tmp;
-    tree.listNodeLabels.push_back(tmp);
-  }
-  for (int i = 0; i < numNodes; ++i) {
-    int tmp;
-    inFile >> tmp;
-    tree.listParentNodePos.push_back(tmp);
-  }
-  for (int i = 0; i < numNodes; ++i) {
-    double tmp;
-    inFile >> tmp;
-    tree.listEdgeDist.push_back(tmp);
-  }
-
-  return true;
-}
-
-static void ReadNewickLen(const string &strNewick,
-                          map<set<int>, double> &mapClusterLen,
-                          TaxaMapper *pTMapper) {
-  // cout << "ReadNewickLen: strNewick = " << strNewick << endl;
-  // the first letter must be (
-  // YW_ASSERT_INFO( strNewick.length() > 0 && strNewick[0] == '(', "Bad Newick
-  // format" );
-
-  const char *strNwBuf = strNewick.c_str();
-
-  // reverse and find the last ) to get dist
-  int posLastState = -1;
-  double bLen = 1.0;
-  for (int i = (int)strNewick.length() - 1; i >= 0; --i) {
-    if (strNewick[i] == ':') {
-      float fLen = 1.0;
-      sscanf(strNwBuf + i + 1, "%f", &fLen);
-      bLen = fLen;
-      if (strNewick[i] != ')') {
-        posLastState = i - 2;
-        break;
-      }
-    } else if (strNewick[i] == ')') {
-      // should also stop
-      posLastState = i - 1;
-      break;
-    }
-  }
-  // accumlate all the labels
-  PhylogenyTreeBasic phTree;
-  phTree.ConsOnNewick(strNewick, -1, false, pTMapper);
-
-  // see if zero is in, if not, must have 1 and decrease by 1
-  set<int> lvids;
-  phTree.GetLeaveIds(lvids);
-  // cout << "ReadNewickLen: lvids = ";
-  // DumpIntSet( lvids );
-
-  // add a record
-  mapClusterLen.insert(map<set<int>, double>::value_type(lvids, bLen));
-  // cout << "Subtree len = " << bLen << ", for leaf set = ";
-  // DumpIntSet( lvids );
-
-  // given newick format, read in the edge length of the clusters
-  // we will perform this recursively
-  // first find the position where it is the first ,
-  int posSplit = -1;
-  int netParen = 0;
-  for (int i = 0; i < (int)strNewick.length(); ++i) {
-    if (strNewick[i] == '(') {
-      netParen++;
-    } else if (strNewick[i] == ')') {
-      netParen--;
-    }
-    if (netParen == 1 && strNewick[i] == ',') {
-      posSplit = i;
-      break;
-    }
-  }
-  // YW_ASSERT_INFO( netParen >= 0 && posSplit >= 1, "Bad Newick format" );
-
-  // now recurisvely to two children (if needed)
-  if (posSplit >= 0) {
-    YW_ASSERT_INFO(posSplit - 1 >= 1, "Newick format wrong");
-    string strLeft = strNewick.substr(1, posSplit - 1);
-    ReadNewickLen(strLeft, mapClusterLen, pTMapper);
-    YW_ASSERT_INFO(posSplit + 1 <= posLastState, "Newick format wrong");
-    string strRight = strNewick.substr(posSplit + 1, posLastState - posSplit);
-    ReadNewickLen(strRight, mapClusterLen, pTMapper);
-  }
-}
-
-static int UpdateMTreeWithNWString(MarginalTree &treeToChange, int &leafNext,
-                                   int &nodeIntNext, string &strNewick,
-                                   TaxaMapper *pTMapper) {
-  // cout << "UpdateMTreeWithNWString: strNewick = " << strNewick  << ",
-  // leafNext = " << leafNext << ", nodeIntNext = " << nodeIntNext << endl;
-  // a recursive call to change all the nodes from nodeToChnage to the correct
-  // length specified by the string strNewick (and all the underlying nodes)
-  // return the current node
-
-  // conslidate the newick string first
-  string strNewickUse = strNewick;
-  NewickUtils ::ConsolidateSinglChildChain(strNewickUse);
-  if (strNewickUse != strNewick) {
-    // cout << "**Newick: " << strNewick << ", after consolidate: " <<
-    // strNewickUse << endl;
-  }
-  // first find current length by finding the rightmost : (outside any ))
-  // now find the separator in order to proceed recurisvely
-  string strNW1, strNW2;
-  bool fNonAtom = NewickUtils ::FindSplitIn(strNewickUse, strNW1, strNW2);
-  int nodeCurrent;
-  if (fNonAtom == true) {
-    // now recursive
-    if (nodeIntNext < treeToChange.GetNumLeaves()) {
-      treeToChange.Dump();
-      cout << "nodeIntNext: " << nodeIntNext << ", ";
-      cout << "Tree to chagne: " << treeToChange.GetNewick() << endl;
-    }
-    YW_ASSERT_INFO(nodeIntNext >= treeToChange.GetNumLeaves(),
-                   "UpdateBranchLenInfo: internal node out of range");
-    nodeCurrent = nodeIntNext--;
-  } else {
-    YW_ASSERT_INFO(leafNext < treeToChange.GetNumLeaves(),
-                   "UpdateBranchLenInfo: Leaf out of range");
-    // this node is a leaf
-    nodeCurrent = leafNext++;
-
-    // int idNew;
-    // sscanf(strNewick.c_str(), "%d", &idNew);
-    int idNew = TaxaMapper ::GetIdFromStr(strNewickUse, pTMapper);
-    treeToChange.SetLabel(nodeCurrent, idNew);
-  }
-  // cout << "nodeCurrent = " << nodeCurrent << endl;
-
-  // by default, set branch length to be 1.0
-  float lenCur = 1.0;
-  size_t posSep1 = strNewickUse.rfind(':');
-  size_t posSep2 = strNewickUse.rfind(')');
-  if (posSep1 != string::npos &&
-      (posSep1 > posSep2 || posSep2 == string::npos)) {
-    // yes, there is a length specified for this
-    sscanf(strNewickUse.c_str() + (int)posSep1 + 1, "%f", &lenCur);
-  }
-  double lenCurUse = lenCur;
-  treeToChange.SetBranchLen(nodeCurrent, lenCurUse);
-  // cout << "Found length: " << lenCurUse << " for node " << nodeCurrent <<
-  // endl;
-  // now recurse
-  // cout << "In UpdateMTreeWithNWString: \n";
-  // treeToChange.Dump();
-  if (fNonAtom == true) {
-    // now recursive
-    int nodeChild1 = UpdateMTreeWithNWString(treeToChange, leafNext,
-                                             nodeIntNext, strNW1, pTMapper);
-    int nodeChild2 = UpdateMTreeWithNWString(treeToChange, leafNext,
-                                             nodeIntNext, strNW2, pTMapper);
-
-    // update the par pos
-    treeToChange.SetParent(nodeChild1, nodeCurrent, false);
-    treeToChange.SetParent(nodeChild2, nodeCurrent, false);
-  }
-
-  return nodeCurrent;
-}
-
-bool ReadinMarginalTrees(ifstream &inFile, vector<MarginalTree> &treeList) {
-  // first read in the number of chrom
-  int numLeaves;
-  inFile >> numLeaves;
-  int nTreeNodes;
-  inFile >> nTreeNodes;
-  int nTrees;
-  inFile >> nTrees;
-  treeList.clear();
-  for (int i = 0; i < nTrees; ++i) {
-    // cout << "Reading TREE " << i << endl;
-    MarginalTree tree;
-    ReadinOneMarginalTree(inFile, nTreeNodes, tree);
-    tree.numLeaves = numLeaves;
-    treeList.push_back(tree);
-
-    // tree.Dump();
-    // YW_ASSERT_INFO(false, "early abort");
-  }
-  return true;
-}
-
-void CollapseEquivTrees(const vector<MarginalTree> &listOrigTrees,
-                        vector<MarginalTree> &listUniqTrees,
-                        vector<int> &listMultiplicity) {
-  // collect ordered-leaf tree newick strings
-  vector<string> listRepOrderedLeafList;
-  for (int tr = 0; tr < (int)listOrigTrees.size(); ++tr) {
-    int numLeaves = listOrigTrees[tr].GetNumLeaves();
-    // make the tree binary
-    // listTreesMT[tr].Binarize();
-    // cout << "Processing gene tree: ";
-    // listTreesMT[tr].Dump();
-    PhylogenyTreeBasic *pphtree = new PhylogenyTreeBasic;
-    pphtree->ConsOnParPosList(listOrigTrees[tr].listParentNodePos, numLeaves,
-                              true);
-    pphtree->UpdateIntLabel(listOrigTrees[tr].listNodeLabels);
-    pphtree->Order();
-    string strNewick1;
-    pphtree->ConsNewick(strNewick1);
-    delete pphtree;
-    //		cout << "Constructed one gene Tree = " << strNewick1 << endl;
-    listRepOrderedLeafList.push_back(strNewick1);
-    //		const int ROOT_LABLE = 7;
-    // TestReroot(pphtree, ROOT_LABLE);
-  }
-
-  listUniqTrees.clear();
-  listMultiplicity.clear();
-  vector<string> listStoredLeafList;
-
-  // check each tree in orig list
-  for (int tr = 0; tr < (int)listRepOrderedLeafList.size(); ++tr) {
-    bool fFound = false;
-    for (int trNew = 0; trNew < (int)listStoredLeafList.size(); ++trNew) {
-      if (listStoredLeafList[trNew] == listRepOrderedLeafList[tr]) {
-        // add it
-        listMultiplicity[trNew]++;
-        fFound = true;
-        break;
-      }
-    }
-    if (fFound == false) {
-      listUniqTrees.push_back(listOrigTrees[tr]);
-      listMultiplicity.push_back(1);
-      listStoredLeafList.push_back(listRepOrderedLeafList[tr]);
-    }
-  }
-}
-
-bool ReadinMarginalTreesNewick(ifstream &inFile, int numLeaves,
-                               vector<MarginalTree> &treeList,
-                               TaxaMapper *pTMapper, bool fDup) {
-  // NOTE: RETURN TRUE IF NO LABEL ADJUSTMENT IS DONE
-  // RETURN FALSE IF WE SWITCHED LABEL BY DECREASING BY ONE
-  // figure out leave num
-  bool fNoChange = true;
-  int nLvs = numLeaves;
-
-  // read marginal trees in newick format
-  // here there is no preamble, one line per tree
-  while (inFile.eof() == false) {
-    string treeNewick;
-    inFile >> treeNewick;
-    if (treeNewick.size() == 0) {
-      break;
-    }
-    // cout << "newick tree = " << treeNewick << endl;
+void RemapLeafIntLabelsTaxaMap(MarginalTree &mtree, map<string, string> &mapper)
+{
+    // map the leaf labels to new integer labels
+    for (int i = 0; i < mtree.GetNumLeaves(); ++i)
+    {
+        int lbl = mtree.GetLabel(i);
+        char buf[100];
+        sprintf(buf, "%d", lbl);
+        string strbuf = buf;
+        YW_ASSERT_INFO(mapper.find(strbuf) != mapper.end(), "Fail to find");
+        string strLbl = mapper[strbuf];
+        int lblNewInt;
+        sscanf(strLbl.c_str(), "%d", &lblNewInt);
+        mtree.SetLabel(i, lblNewInt);
+    }
+}
 
-    //#if 0
-    // update numleaves
-    multiset<string> setLabels;
-    NewickUtils ::RetrieveLabelSet(treeNewick, setLabels);
-    nLvs = setLabels.size();
-    //#endif
+void RemapMargTree(MarginalTree &mtree, TaxaMapper &refTMapper)
+{
     //
-    PhylogenyTreeBasic phTree;
-    // if( fDup == false )
-    //{
-    phTree.ConsOnNewick(treeNewick, -1, false, pTMapper);
-    //}
-    // else
-    //{
-    //	phTree.ConsOnNewickDupLabels(treeNewick, pTMapper);
-    //}
+    // map the leaf labels to new integer labels
+    //cout << "RemapMargTree: mtree:" << mtree.GetNewick() << endl;
+    //mtree.Dump();
+    for (int i = 0; i < mtree.GetNumLeaves(); ++i)
+    {
+        int lbl = mtree.GetLabel(i);
+        string strlbl = refTMapper.GetString(lbl);
+        int lblNew = lbl;
+        sscanf(strlbl.c_str(), "%d", &lblNew);
+        mtree.SetLabel(i, lblNew);
+    }
+}
+
+static bool ReadinOneMarginalTree(ifstream &inFile, int numNodes, MarginalTree &tree)
+{
+    // first read in the node ids
+    for (int i = 0; i < numNodes; ++i)
+    {
+        int tmp;
+        inFile >> tmp;
+        tree.listNodeLabels.push_back(tmp);
+    }
+    for (int i = 0; i < numNodes; ++i)
+    {
+        int tmp;
+        inFile >> tmp;
+        tree.listParentNodePos.push_back(tmp);
+    }
+    for (int i = 0; i < numNodes; ++i)
+    {
+        double tmp;
+        inFile >> tmp;
+        tree.listEdgeDist.push_back(tmp);
+    }
+
+    return true;
+}
+
+static void ReadNewickLen(const string &strNewick, map<set<int>, double> &mapClusterLen, TaxaMapper *pTMapper)
+{
+    //cout << "ReadNewickLen: strNewick = " << strNewick << endl;
+    // the first letter must be (
+    //YW_ASSERT_INFO( strNewick.length() > 0 && strNewick[0] == '(', "Bad Newick format" );
 
-    if (pTMapper != NULL) {
-      pTMapper->SetInitialized(true);
+    const char *strNwBuf = strNewick.c_str();
+
+    // reverse and find the last ) to get dist
+    int posLastState = -1;
+    double bLen = 1.0;
+    for (int i = (int)strNewick.length() - 1; i >= 0; --i)
+    {
+        if (strNewick[i] == ':')
+        {
+            float fLen = 1.0;
+            sscanf(strNwBuf + i + 1, "%f", &fLen);
+            bLen = fLen;
+            if (strNewick[i] != ')')
+            {
+                posLastState = i - 2;
+                break;
+            }
+        }
+        else if (strNewick[i] == ')')
+        {
+            // should also stop
+            posLastState = i - 1;
+            break;
+        }
     }
-    // string strTr;
-    // phTree.ConsNewick(strTr);
-    // cout << "After reconstruction: strTr = " << strTr << endl;
-    // see if zero is in, if not, must have 1 and decrease by 1
+    // accumlate all the labels
+    PhylogenyTreeBasic phTree;
+    phTree.ConsOnNewick(strNewick, -1, false, pTMapper);
+
+    //see if zero is in, if not, must have 1 and decrease by 1
     set<int> lvids;
     phTree.GetLeaveIds(lvids);
-    if (lvids.find(0) == lvids.end()) {
-      YW_ASSERT_INFO(lvids.find(1) != lvids.end(), "Wrong");
-
-      // decrease by one
-      phTree.InitPostorderWalk();
-      while (true) {
-        TreeNode *pn = phTree.NextPostorderWalk();
-        if (pn == NULL) {
-          break; // done with all nodes
-        }
-        if (pn->IsLeaf() == true) {
-          // cout << "Found leaf id: " << pn->GetID() << endl;
-          pn->SetID(pn->GetID() - 1);
-          // YW: 8/18/11, changed. NEED VERIFICATION
-          // char buf[1000];
-          // sprintf(buf, "%d", pn->GetID() );
-          // string lblNew = buf;
-          // pn->SetLabel( lblNew );
-        }
-      }
-
-      // mark the change
-      fNoChange = false;
-    }
-
-    vector<int> nidsList, nparsList;
-    phTree.GetNodeParInfo(nidsList, nparsList);
-    // cout << "nidsList: ";
-    // DumpIntVec( nidsList );
-    // cout << "nparsList" << ": ";
-    // DumpIntVec(nparsList);
-    // phTree.GetNodeParInfoNew(nidsList, nparsList);
-    // phTree.GetNodeParInfo(nidsList, nparsList);
-    // if( nLvs <= 0 )
-    //{
-    // YW: 09072010, ASSUME the tree is binary tree
-    nLvs = (phTree.GetNumVertices() + 1) / 2;
-    // cout << "nlvs = " << nLvs << endl;
-    //}
-    MarginalTree tree;
-    InitMarginalTree(tree, nLvs, nidsList, nparsList);
-    // cout << "After init, mtree = ";
-    // tree.Dump();
-    // YW: 01/30/12, sort the leaf first
-    tree.SortByLeafId();
-    // cout << "After sorting, tree = ";
-    // tree.Dump();
-
-    // cout << "Initialize a tree: ";
-    // tree.Dump();
-    treeList.push_back(tree);
-    // cout << "Newick format of this marginal tree: ";
-    // cout << tree.GetNewick() << endl;
-  }
-  return fNoChange;
-}
-
-bool ReadinMarginalTreesNewickWLenString(const string &strNewick, int numLeaves,
-                                         MarginalTree &treeOut,
-                                         bool fStartFromZero,
-                                         TaxaMapper *pTMapper) {
-  // YW_ASSERT_INFO(pTMapper != NULL, "Stop here2");
-  // mark the change
-  bool fNoChange = true;
-  // NOTE: RETURN TRUE IF NO LABEL ADJUSTMENT IS DONE
-  // RETURN FALSE IF WE SWITCHED LABEL BY DECREASING BY ONE
-  // figure out leave num
-
-  if (strNewick.size() == 0) {
+    //cout << "ReadNewickLen: lvids = ";
+    //DumpIntSet( lvids );
+
+    // add a record
+    mapClusterLen.insert(map<set<int>, double>::value_type(lvids, bLen));
+    //cout << "Subtree len = " << bLen << ", for leaf set = ";
+    //DumpIntSet( lvids );
+
+    // given newick format, read in the edge length of the clusters
+    // we will perform this recursively
+    // first find the position where it is the first ,
+    int posSplit = -1;
+    int netParen = 0;
+    for (int i = 0; i < (int)strNewick.length(); ++i)
+    {
+        if (strNewick[i] == '(')
+        {
+            netParen++;
+        }
+        else if (strNewick[i] == ')')
+        {
+            netParen--;
+        }
+        if (netParen == 1 && strNewick[i] == ',')
+        {
+            posSplit = i;
+            break;
+        }
+    }
+    //YW_ASSERT_INFO( netParen >= 0 && posSplit >= 1, "Bad Newick format" );
+
+    // now recurisvely to two children (if needed)
+    if (posSplit >= 0)
+    {
+        YW_ASSERT_INFO(posSplit - 1 >= 1, "Newick format wrong");
+        string strLeft = strNewick.substr(1, posSplit - 1);
+        ReadNewickLen(strLeft, mapClusterLen, pTMapper);
+        YW_ASSERT_INFO(posSplit + 1 <= posLastState, "Newick format wrong");
+        string strRight = strNewick.substr(posSplit + 1, posLastState - posSplit);
+        ReadNewickLen(strRight, mapClusterLen, pTMapper);
+    }
+}
+
+static int UpdateMTreeWithNWString(MarginalTree &treeToChange, int &leafNext, int &nodeIntNext, string &strNewick, TaxaMapper *pTMapper)
+{
+    //cout << "UpdateMTreeWithNWString: strNewick = " << strNewick  << ", leafNext = " << leafNext << ", nodeIntNext = " << nodeIntNext << endl;
+    // a recursive call to change all the nodes from nodeToChnage to the correct length specified
+    // by the string strNewick (and all the underlying nodes)
+    // return the current node
+
+    // conslidate the newick string first
+    string strNewickUse = strNewick;
+    NewickUtils ::ConsolidateSinglChildChain(strNewickUse);
+    if (strNewickUse != strNewick)
+    {
+        //cout << "**Newick: " << strNewick << ", after consolidate: " << strNewickUse << endl;
+    }
+    // first find current length by finding the rightmost : (outside any ))
+    // now find the separator in order to proceed recurisvely
+    string strNW1, strNW2;
+    bool fNonAtom = NewickUtils ::FindSplitIn(strNewickUse, strNW1, strNW2);
+    int nodeCurrent;
+    if (fNonAtom == true)
+    {
+        // now recursive
+        if (nodeIntNext < treeToChange.GetNumLeaves())
+        {
+            treeToChange.Dump();
+            cout << "nodeIntNext: " << nodeIntNext << ", ";
+            cout << "Tree to chagne: " << treeToChange.GetNewick() << endl;
+        }
+        YW_ASSERT_INFO(nodeIntNext >= treeToChange.GetNumLeaves(), "UpdateBranchLenInfo: internal node out of range");
+        nodeCurrent = nodeIntNext--;
+    }
+    else
+    {
+        YW_ASSERT_INFO(leafNext < treeToChange.GetNumLeaves(), "UpdateBranchLenInfo: Leaf out of range");
+        // this node is a leaf
+        nodeCurrent = leafNext++;
+
+        //int idNew;
+        //sscanf(strNewick.c_str(), "%d", &idNew);
+        int idNew = TaxaMapper ::GetIdFromStr(strNewickUse, pTMapper);
+        treeToChange.SetLabel(nodeCurrent, idNew);
+    }
+    //cout << "nodeCurrent = " << nodeCurrent << endl;
+
+    // by default, set branch length to be 1.0
+    float lenCur = 1.0;
+    size_t posSep1 = strNewickUse.rfind(':');
+    size_t posSep2 = strNewickUse.rfind(')');
+    if (posSep1 != string::npos && (posSep1 > posSep2 || posSep2 == string::npos))
+    {
+        // yes, there is a length specified for this
+        sscanf(strNewickUse.c_str() + (int)posSep1 + 1, "%f", &lenCur);
+    }
+    double lenCurUse = lenCur;
+    treeToChange.SetBranchLen(nodeCurrent, lenCurUse);
+    //cout << "Found length: " << lenCurUse << " for node " << nodeCurrent << endl;
+    // now recurse
+    //cout << "In UpdateMTreeWithNWString: \n";
+    //treeToChange.Dump();
+    if (fNonAtom == true)
+    {
+        // now recursive
+        int nodeChild1 = UpdateMTreeWithNWString(treeToChange, leafNext, nodeIntNext, strNW1, pTMapper);
+        int nodeChild2 = UpdateMTreeWithNWString(treeToChange, leafNext, nodeIntNext, strNW2, pTMapper);
+
+        // update the par pos
+        treeToChange.SetParent(nodeChild1, nodeCurrent, false);
+        treeToChange.SetParent(nodeChild2, nodeCurrent, false);
+    }
+
+    return nodeCurrent;
+}
+
+bool ReadinMarginalTrees(ifstream &inFile, vector<MarginalTree> &treeList)
+{
+    // first read in the number of chrom
+    int numLeaves;
+    inFile >> numLeaves;
+    int nTreeNodes;
+    inFile >> nTreeNodes;
+    int nTrees;
+    inFile >> nTrees;
+    treeList.clear();
+    for (int i = 0; i < nTrees; ++i)
+    {
+        //cout << "Reading TREE " << i << endl;
+        MarginalTree tree;
+        ReadinOneMarginalTree(inFile, nTreeNodes, tree);
+        tree.numLeaves = numLeaves;
+        treeList.push_back(tree);
+
+        //tree.Dump();
+        //YW_ASSERT_INFO(false, "early abort");
+    }
+    return true;
+}
+
+void CollapseEquivTrees(const vector<MarginalTree> &listOrigTrees, vector<MarginalTree> &listUniqTrees, vector<int> &listMultiplicity)
+{
+    // collect ordered-leaf tree newick strings
+    vector<string> listRepOrderedLeafList;
+    for (int tr = 0; tr < (int)listOrigTrees.size(); ++tr)
+    {
+        int numLeaves = listOrigTrees[tr].GetNumLeaves();
+        // make the tree binary
+        //listTreesMT[tr].Binarize();
+        //cout << "Processing gene tree: ";
+        //listTreesMT[tr].Dump();
+        PhylogenyTreeBasic *pphtree = new PhylogenyTreeBasic;
+        pphtree->ConsOnParPosList(listOrigTrees[tr].listParentNodePos, numLeaves, true);
+        pphtree->UpdateIntLabel(listOrigTrees[tr].listNodeLabels);
+        pphtree->Order();
+        string strNewick1;
+        pphtree->ConsNewick(strNewick1);
+        delete pphtree;
+        //		cout << "Constructed one gene Tree = " << strNewick1 << endl;
+        listRepOrderedLeafList.push_back(strNewick1);
+        //		const int ROOT_LABLE = 7;
+        //TestReroot(pphtree, ROOT_LABLE);
+    }
+
+    listUniqTrees.clear();
+    listMultiplicity.clear();
+    vector<string> listStoredLeafList;
+
+    // check each tree in orig list
+    for (int tr = 0; tr < (int)listRepOrderedLeafList.size(); ++tr)
+    {
+        bool fFound = false;
+        for (int trNew = 0; trNew < (int)listStoredLeafList.size(); ++trNew)
+        {
+            if (listStoredLeafList[trNew] == listRepOrderedLeafList[tr])
+            {
+                // add it
+                listMultiplicity[trNew]++;
+                fFound = true;
+                break;
+            }
+        }
+        if (fFound == false)
+        {
+            listUniqTrees.push_back(listOrigTrees[tr]);
+            listMultiplicity.push_back(1);
+            listStoredLeafList.push_back(listRepOrderedLeafList[tr]);
+        }
+    }
+}
+
+bool ReadinMarginalTreesNewick(ifstream &inFile, int numLeaves, vector<MarginalTree> &treeList, TaxaMapper *pTMapper, bool fDup)
+{
+    // NOTE: RETURN TRUE IF NO LABEL ADJUSTMENT IS DONE
+    // RETURN FALSE IF WE SWITCHED LABEL BY DECREASING BY ONE
+    // figure out leave num
+    bool fNoChange = true;
+    int nLvs = numLeaves;
+
+    // read marginal trees in newick format
+    // here there is no preamble, one line per tree
+    while (inFile.eof() == false)
+    {
+        string treeNewick;
+        inFile >> treeNewick;
+        if (treeNewick.size() == 0)
+        {
+            break;
+        }
+        //cout << "newick tree = " << treeNewick << endl;
+
+        //#if 0
+        // update numleaves
+        multiset<string> setLabels;
+        NewickUtils ::RetrieveLabelSet(treeNewick, setLabels);
+        nLvs = setLabels.size();
+        //#endif
+        //
+        PhylogenyTreeBasic phTree;
+        //if( fDup == false )
+        //{
+        phTree.ConsOnNewick(treeNewick, -1, false, pTMapper);
+        //}
+        //else
+        //{
+        //	phTree.ConsOnNewickDupLabels(treeNewick, pTMapper);
+        //}
+
+        if (pTMapper != NULL)
+        {
+            pTMapper->SetInitialized(true);
+        }
+        //string strTr;
+        //phTree.ConsNewick(strTr);
+        //cout << "After reconstruction: strTr = " << strTr << endl;
+        //see if zero is in, if not, must have 1 and decrease by 1
+        set<int> lvids;
+        phTree.GetLeaveIds(lvids);
+        if (lvids.find(0) == lvids.end())
+        {
+            YW_ASSERT_INFO(lvids.find(1) != lvids.end(), "Wrong");
+
+            // decrease by one
+            phTree.InitPostorderWalk();
+            while (true)
+            {
+                TreeNode *pn = phTree.NextPostorderWalk();
+                if (pn == NULL)
+                {
+                    break; // done with all nodes
+                }
+                if (pn->IsLeaf() == true)
+                {
+                    //cout << "Found leaf id: " << pn->GetID() << endl;
+                    pn->SetID(pn->GetID() - 1);
+                    // YW: 8/18/11, changed. NEED VERIFICATION
+                    //char buf[1000];
+                    //sprintf(buf, "%d", pn->GetID() );
+                    //string lblNew = buf;
+                    //pn->SetLabel( lblNew );
+                }
+            }
+
+            // mark the change
+            fNoChange = false;
+        }
+
+        vector<int> nidsList, nparsList;
+        phTree.GetNodeParInfo(nidsList, nparsList);
+        //cout << "nidsList: ";
+        //DumpIntVec( nidsList );
+        //cout << "nparsList" << ": ";
+        //DumpIntVec(nparsList);
+        //phTree.GetNodeParInfoNew(nidsList, nparsList);
+        //phTree.GetNodeParInfo(nidsList, nparsList);
+        //if( nLvs <= 0 )
+        //{
+        // YW: 09072010, ASSUME the tree is binary tree
+        nLvs = (phTree.GetNumVertices() + 1) / 2;
+        //cout << "nlvs = " << nLvs << endl;
+        //}
+        MarginalTree tree;
+        InitMarginalTree(tree, nLvs, nidsList, nparsList);
+        //cout << "After init, mtree = ";
+        //tree.Dump();
+        // YW: 01/30/12, sort the leaf first
+        tree.SortByLeafId();
+        //cout << "After sorting, tree = ";
+        //tree.Dump();
+
+        //cout << "Initialize a tree: ";
+        //tree.Dump();
+        treeList.push_back(tree);
+        //cout << "Newick format of this marginal tree: ";
+        //cout << tree.GetNewick() << endl;
+    }
     return fNoChange;
-  }
-  // cout << "newick tree = " << strNewick << endl;
+}
 
-  // make sure leave num is correct
-  if (numLeaves < 0) {
-    //
-    multiset<string> setLabels;
-    NewickUtils ::RetrieveLabelSet(strNewick, setLabels);
-    numLeaves = setLabels.size();
-    // cout << "Set number of leaves of marginal tree to: " << numLeaves <<
-    // endl;
-  }
-
-  int nLvs = numLeaves;
-
-  // assume binary tree for now
-  int numTotNodes = 2 * nLvs - 1;
-  // init Marginal tree for now
-  vector<int> trLbls, trPos;
-  vector<double> trDist;
-  for (int i = 0; i < numTotNodes; ++i) {
-    trLbls.push_back(i);
-    trPos.push_back(-1);
-    trDist.push_back(0.0);
-  }
-  treeOut.SetNumLeaves(nLvs);
-  treeOut.SetLabelList(trLbls);
-  treeOut.SetParList(trPos);
-  treeOut.SetBranchLenList(trDist);
-  // InitMarginalTree(treeOut, nLvs, trLbls, trPos);
-
-  // now update tree
-  int leafNext = 0;
-  int nodeIntNext = numTotNodes - 1;
-  string strNewickUse = strNewick;
-  UpdateMTreeWithNWString(treeOut, leafNext, nodeIntNext, strNewickUse,
-                          pTMapper);
-  // cout << "Immediate after UpdateMTreeWithNWString: treeOut: \n";
-  // treeOut.Dump();
-
-  // finally prepare marginal tree for query
-  treeOut.BuildDescendantInfo();
-  // cout << "ReadinMarginalTreesNewickWLenString: newick string = \n" <<
-  // treeOut.GetNewick() << endl;
-
-  if (pTMapper != NULL) {
-    pTMapper->SetInitialized(true);
-  }
-
-  return fNoChange;
-}
-
-bool ReadinMarginalTreesNewickWLen(ifstream &inFile, int numLeaves,
-                                   vector<MarginalTree> &treeList,
-                                   TaxaMapper *pTMapper) {
-  // YW_ASSERT_INFO(pTMapper != NULL, "Stop here");
-  // NOTE: RETURN TRUE IF NO LABEL ADJUSTMENT IS DONE
-  // RETURN FALSE IF WE SWITCHED LABEL BY DECREASING BY ONE
-  // figure out leave num
-  bool fNoChange = true;
-  // int nLvs = numLeaves;
-
-  // read marginal trees in newick format
-  // here there is no preamble, one line per tree
-  while (inFile.eof() == false) {
-    string treeNewick;
-    inFile >> treeNewick;
-    if (treeNewick.size() == 0) {
-      break;
-    }
-    MarginalTree tree;
-    bool fres = ReadinMarginalTreesNewickWLenString(treeNewick, numLeaves, tree,
-                                                    true, pTMapper);
-    if (fres == false) {
-      fNoChange = false;
-    }
-    if (pTMapper != NULL) {
-      pTMapper->SetInitialized(true);
-    }
-
-    // cout << "Initialize a tree: ";
-    // tree.Dump();
-    treeList.push_back(tree);
-  }
-  return fNoChange;
-}
-
-void AddRootAsLeafToTree(MarginalTree &tree1, bool fIdNonNeg) {
-  // cout << "AddRootAsLeafToTree: tree1 = \n";
-  // tree1.Dump();
-  // we now add the root to the tree as a special leaf
-  vector<int> nodesIdNew, nodesParsNew;
-  for (int i = 0; i < tree1.GetNumLeaves(); ++i) {
-    nodesIdNew.push_back(tree1.listNodeLabels[i]);
-    nodesParsNew.push_back(tree1.GetParent(i) + 1);
-  }
-  // add the new special leaf
-  int idLeafNew = -2; // -2 is the default unique id for this speical node
-  int idNewStart = 3 * tree1.GetNumLeaves() + 1;
-  if (fIdNonNeg == true) {
-    // use  continuous id
-    idLeafNew = tree1.GetNumLeaves();
-  }
-  nodesIdNew.push_back(idLeafNew);
-  nodesParsNew.push_back(tree1.GetTotNodesNum() + 1);
-  // add the rest
-  for (int i = tree1.GetNumLeaves(); i < tree1.GetTotNodesNum(); ++i) {
-    nodesIdNew.push_back(tree1.listNodeLabels[i]);
-    int oldpar = tree1.GetParent(i);
-    if (oldpar < 0) {
-      // this is the old root
-      nodesParsNew.push_back(tree1.GetTotNodesNum() + 1);
-    } else {
-      nodesParsNew.push_back(oldpar + 1);
-    }
-  }
-  // finally the new root
-  int idRootId = -3; // -3 is the unique id for this speical root
-  if (fIdNonNeg == true) {
-    // use  it
-    idRootId = ++idNewStart;
-  }
-  nodesIdNew.push_back(idRootId);
-  nodesParsNew.push_back(-1);
-
-  // finally increment the number of leaves
-  tree1.listNodeLabels = nodesIdNew;
-  tree1.listParentNodePos = nodesParsNew;
-  tree1.numLeaves++;
-  // cout << "After adding the root, now tree1 = \n";
-  // tree1.Dump();
-}
-
-void GenRandBinaryTree(int numLeaves, MarginalTree &tree1) {
-  // generate a binary marginal tree with certain number of leaves
-  // we do this by random pick two active nodes (a leave without assiging
-  // parents)
-  tree1.Clear();
-  tree1.numLeaves = numLeaves;
-
-  // first add a list of leaves
-  set<int> activeNodes;
-  for (int i = 0; i < numLeaves; ++i) {
-    tree1.listNodeLabels.push_back(i);
-    tree1.listParentNodePos.push_back(-1); // for now, set to -1
-                                           // (un-initialized)
-    tree1.listEdgeDist.push_back(0.0);
-    activeNodes.insert(i);
-  }
-
-  // now start to setup new internal nodes (and assign parents)
-  while (activeNodes.size() >= 2) {
-    // cout << "activeNodes = ";
-    // DumpIntSet( activeNodes );
-
-    // uniformly pick two nodes
-    int node1 = GetRandItemInSet(activeNodes);
-    activeNodes.erase(node1);
-    int node2 = GetRandItemInSet(activeNodes);
-    activeNodes.erase(node2);
-    // cout << "Select node1 = " << node1 << ", node2 = " << node2 << endl;
-    // now create a new node
-    int nodeNew = tree1.listNodeLabels.size();
-    tree1.listNodeLabels.push_back(nodeNew);
-    tree1.listParentNodePos.push_back(-1); // for now, set to -1
-                                           // (un-initialized)
-    tree1.listEdgeDist.push_back(0.0);
-    activeNodes.insert(nodeNew);
-    // cout << "nodeNew = " << nodeNew << endl;
-    // setup parent of two children to it
-    tree1.SetParent(node1, nodeNew);
-    tree1.SetParent(node2, nodeNew);
-  }
-}
-
-void GenRandBinaryTreeClock(int numLeaves, double totHt, MarginalTree &tree1) {
-  // generate a binary marginal tree with certain number of leaves and have
-  // clock property we do this by random pick two active nodes (a leave without
-  // assiging parents)
-  map<int, double> mapNodeHeights;
-
-  tree1.Clear();
-  tree1.numLeaves = numLeaves;
-
-  // first add a list of leaves
-  set<int> activeNodes;
-  for (int i = 0; i < numLeaves; ++i) {
-    tree1.listNodeLabels.push_back(i);
-    tree1.listParentNodePos.push_back(-1); // for now, set to -1
-                                           // (un-initialized)
-    tree1.listEdgeDist.push_back(0.0);
-    activeNodes.insert(i);
-    mapNodeHeights.insert(map<int, double>::value_type(i, 0.0));
-  }
-
-  // now start to setup new internal nodes (and assign parents)
-  while (activeNodes.size() >= 2) {
-    // cout << "activeNodes = ";
-    // DumpIntSet( activeNodes );
-
-    // uniformly pick two nodes
-    int node1 = GetRandItemInSet(activeNodes);
-    activeNodes.erase(node1);
-    int node2 = GetRandItemInSet(activeNodes);
-    activeNodes.erase(node2);
-    // cout << "Select node1 = " << node1 << ", node2 = " << node2 << endl;
-    // now create a new node
-    int nodeNew = tree1.listNodeLabels.size();
-    tree1.listNodeLabels.push_back(nodeNew);
-    tree1.listParentNodePos.push_back(-1); // for now, set to -1
-                                           // (un-initialized)
-    tree1.listEdgeDist.push_back(0.0);
-    activeNodes.insert(nodeNew);
-    double htNodeCur =
-        totHt * (numLeaves - (double)activeNodes.size()) / (numLeaves - 1);
-    // cout << "Node: " << nodeNew << ", ht = " << htNodeCur << endl;
-    // set branches
-    mapNodeHeights.insert(map<int, double>::value_type(nodeNew, htNodeCur));
-    YW_ASSERT_INFO(mapNodeHeights.find(node1) != mapNodeHeights.end(),
-                   "Not found");
-    YW_ASSERT_INFO(node1 < (int)tree1.listEdgeDist.size(), "Wrong");
-    tree1.listEdgeDist[node1] = htNodeCur - mapNodeHeights[node1];
-    // cout << "Setting edge " << node1 << " to " <<  tree1.listEdgeDist[node1]
-    // << endl;
-    YW_ASSERT_INFO(tree1.listEdgeDist[node1] >= 0.0, "Negative");
-    YW_ASSERT_INFO(mapNodeHeights.find(node2) != mapNodeHeights.end(),
-                   "Not found");
-    YW_ASSERT_INFO(node2 < (int)tree1.listEdgeDist.size(), "Wrong");
-    tree1.listEdgeDist[node2] = htNodeCur - mapNodeHeights[node2];
-    YW_ASSERT_INFO(tree1.listEdgeDist[node2] >= 0.0, "Negative");
-    // cout << "Setting edge " << node2 << " to " <<  tree1.listEdgeDist[node2]
-    // << endl; cout << "nodeNew = " << nodeNew << endl;
-    // setup parent of two children to it
-    tree1.SetParent(node1, nodeNew, false);
-    tree1.SetParent(node2, nodeNew, false);
-  }
-
-  // cout << "Edge dist list: ";
-  // DumpDoubleVec(tree1.listEdgeDist);
+bool ReadinMarginalTreesNewickWLenString(const string &strNewick, int numLeaves, MarginalTree &treeOut, bool fStartFromZero, TaxaMapper *pTMapper)
+{
+    //YW_ASSERT_INFO(pTMapper != NULL, "Stop here2");
+    // mark the change
+    bool fNoChange = true;
+    // NOTE: RETURN TRUE IF NO LABEL ADJUSTMENT IS DONE
+    // RETURN FALSE IF WE SWITCHED LABEL BY DECREASING BY ONE
+    // figure out leave num
+
+    if (strNewick.size() == 0)
+    {
+        return fNoChange;
+    }
+    //cout << "newick tree = " << strNewick << endl;
+
+    // make sure leave num is correct
+    if (numLeaves < 0)
+    {
+        //
+        multiset<string> setLabels;
+        NewickUtils ::RetrieveLabelSet(strNewick, setLabels);
+        numLeaves = setLabels.size();
+        //cout << "Set number of leaves of marginal tree to: " << numLeaves << endl;
+    }
+
+    int nLvs = numLeaves;
+
+    // assume binary tree for now
+    int numTotNodes = 2 * nLvs - 1;
+    // init Marginal tree for now
+    vector<int> trLbls, trPos;
+    vector<double> trDist;
+    for (int i = 0; i < numTotNodes; ++i)
+    {
+        trLbls.push_back(i);
+        trPos.push_back(-1);
+        trDist.push_back(0.0);
+    }
+    treeOut.SetNumLeaves(nLvs);
+    treeOut.SetLabelList(trLbls);
+    treeOut.SetParList(trPos);
+    treeOut.SetBranchLenList(trDist);
+    //InitMarginalTree(treeOut, nLvs, trLbls, trPos);
+
+    // now update tree
+    int leafNext = 0;
+    int nodeIntNext = numTotNodes - 1;
+    string strNewickUse = strNewick;
+    UpdateMTreeWithNWString(treeOut, leafNext, nodeIntNext, strNewickUse, pTMapper);
+    //cout << "Immediate after UpdateMTreeWithNWString: treeOut: \n";
+    //treeOut.Dump();
+
+    // finally prepare marginal tree for query
+    treeOut.BuildDescendantInfo();
+    //cout << "ReadinMarginalTreesNewickWLenString: newick string = \n" << treeOut.GetNewick() << endl;
+
+    if (pTMapper != NULL)
+    {
+        pTMapper->SetInitialized(true);
+    }
+
+    return fNoChange;
+}
+
+bool ReadinMarginalTreesNewickWLen(ifstream &inFile, int numLeaves, vector<MarginalTree> &treeList, TaxaMapper *pTMapper)
+{
+    //YW_ASSERT_INFO(pTMapper != NULL, "Stop here");
+    // NOTE: RETURN TRUE IF NO LABEL ADJUSTMENT IS DONE
+    // RETURN FALSE IF WE SWITCHED LABEL BY DECREASING BY ONE
+    // figure out leave num
+    bool fNoChange = true;
+    //int nLvs = numLeaves;
+
+    // read marginal trees in newick format
+    // here there is no preamble, one line per tree
+    while (inFile.eof() == false)
+    {
+        string treeNewick;
+        inFile >> treeNewick;
+        if (treeNewick.size() == 0)
+        {
+            break;
+        }
+        MarginalTree tree;
+        bool fres = ReadinMarginalTreesNewickWLenString(treeNewick, numLeaves, tree, true, pTMapper);
+        if (fres == false)
+        {
+            fNoChange = false;
+        }
+        if (pTMapper != NULL)
+        {
+            pTMapper->SetInitialized(true);
+        }
+
+        //cout << "Initialize a tree: ";
+        //tree.Dump();
+        treeList.push_back(tree);
+    }
+    return fNoChange;
+}
+
+void AddRootAsLeafToTree(MarginalTree &tree1, bool fIdNonNeg)
+{
+    //cout << "AddRootAsLeafToTree: tree1 = \n";
+    //tree1.Dump();
+    // we now add the root to the tree as a special leaf
+    vector<int> nodesIdNew, nodesParsNew;
+    for (int i = 0; i < tree1.GetNumLeaves(); ++i)
+    {
+        nodesIdNew.push_back(tree1.listNodeLabels[i]);
+        nodesParsNew.push_back(tree1.GetParent(i) + 1);
+    }
+    // add the new special leaf
+    int idLeafNew = -2; // -2 is the default unique id for this speical node
+    int idNewStart = 3 * tree1.GetNumLeaves() + 1;
+    if (fIdNonNeg == true)
+    {
+        // use  continuous id
+        idLeafNew = tree1.GetNumLeaves();
+    }
+    nodesIdNew.push_back(idLeafNew);
+    nodesParsNew.push_back(tree1.GetTotNodesNum() + 1);
+    // add the rest
+    for (int i = tree1.GetNumLeaves(); i < tree1.GetTotNodesNum(); ++i)
+    {
+        nodesIdNew.push_back(tree1.listNodeLabels[i]);
+        int oldpar = tree1.GetParent(i);
+        if (oldpar < 0)
+        {
+            // this is the old root
+            nodesParsNew.push_back(tree1.GetTotNodesNum() + 1);
+        }
+        else
+        {
+            nodesParsNew.push_back(oldpar + 1);
+        }
+    }
+    // finally the new root
+    int idRootId = -3; // -3 is the unique id for this speical root
+    if (fIdNonNeg == true)
+    {
+        // use  it
+        idRootId = ++idNewStart;
+    }
+    nodesIdNew.push_back(idRootId);
+    nodesParsNew.push_back(-1);
+
+    // finally increment the number of leaves
+    tree1.listNodeLabels = nodesIdNew;
+    tree1.listParentNodePos = nodesParsNew;
+    tree1.numLeaves++;
+    //cout << "After adding the root, now tree1 = \n";
+    //tree1.Dump();
+}
+
+void GenRandBinaryTree(int numLeaves, MarginalTree &tree1)
+{
+    // generate a binary marginal tree with certain number of leaves
+    // we do this by random pick two active nodes (a leave without assiging parents)
+    tree1.Clear();
+    tree1.numLeaves = numLeaves;
+
+    // first add a list of leaves
+    set<int> activeNodes;
+    for (int i = 0; i < numLeaves; ++i)
+    {
+        tree1.listNodeLabels.push_back(i);
+        tree1.listParentNodePos.push_back(-1); // for now, set to -1 (un-initialized)
+        tree1.listEdgeDist.push_back(0.0);
+        activeNodes.insert(i);
+    }
+
+    // now start to setup new internal nodes (and assign parents)
+    while (activeNodes.size() >= 2)
+    {
+        //cout << "activeNodes = ";
+        //DumpIntSet( activeNodes );
+
+        // uniformly pick two nodes
+        int node1 = GetRandItemInSet(activeNodes);
+        activeNodes.erase(node1);
+        int node2 = GetRandItemInSet(activeNodes);
+        activeNodes.erase(node2);
+        //cout << "Select node1 = " << node1 << ", node2 = " << node2 << endl;
+        // now create a new node
+        int nodeNew = tree1.listNodeLabels.size();
+        tree1.listNodeLabels.push_back(nodeNew);
+        tree1.listParentNodePos.push_back(-1); // for now, set to -1 (un-initialized)
+        tree1.listEdgeDist.push_back(0.0);
+        activeNodes.insert(nodeNew);
+        //cout << "nodeNew = " << nodeNew << endl;
+        // setup parent of two children to it
+        tree1.SetParent(node1, nodeNew);
+        tree1.SetParent(node2, nodeNew);
+    }
+}
+
+void GenRandBinaryTreeClock(int numLeaves, double totHt, MarginalTree &tree1)
+{
+    // generate a binary marginal tree with certain number of leaves and have clock property
+    // we do this by random pick two active nodes (a leave without assiging parents)
+    map<int, double> mapNodeHeights;
+
+    tree1.Clear();
+    tree1.numLeaves = numLeaves;
+
+    // first add a list of leaves
+    set<int> activeNodes;
+    for (int i = 0; i < numLeaves; ++i)
+    {
+        tree1.listNodeLabels.push_back(i);
+        tree1.listParentNodePos.push_back(-1); // for now, set to -1 (un-initialized)
+        tree1.listEdgeDist.push_back(0.0);
+        activeNodes.insert(i);
+        mapNodeHeights.insert(map<int, double>::value_type(i, 0.0));
+    }
+
+    // now start to setup new internal nodes (and assign parents)
+    while (activeNodes.size() >= 2)
+    {
+        //cout << "activeNodes = ";
+        //DumpIntSet( activeNodes );
+
+        // uniformly pick two nodes
+        int node1 = GetRandItemInSet(activeNodes);
+        activeNodes.erase(node1);
+        int node2 = GetRandItemInSet(activeNodes);
+        activeNodes.erase(node2);
+        //cout << "Select node1 = " << node1 << ", node2 = " << node2 << endl;
+        // now create a new node
+        int nodeNew = tree1.listNodeLabels.size();
+        tree1.listNodeLabels.push_back(nodeNew);
+        tree1.listParentNodePos.push_back(-1); // for now, set to -1 (un-initialized)
+        tree1.listEdgeDist.push_back(0.0);
+        activeNodes.insert(nodeNew);
+        double htNodeCur = totHt * (numLeaves - (double)activeNodes.size()) / (numLeaves - 1);
+        //cout << "Node: " << nodeNew << ", ht = " << htNodeCur << endl;
+        // set branches
+        mapNodeHeights.insert(map<int, double>::value_type(nodeNew, htNodeCur));
+        YW_ASSERT_INFO(mapNodeHeights.find(node1) != mapNodeHeights.end(), "Not found");
+        YW_ASSERT_INFO(node1 < (int)tree1.listEdgeDist.size(), "Wrong");
+        tree1.listEdgeDist[node1] = htNodeCur - mapNodeHeights[node1];
+        //cout << "Setting edge " << node1 << " to " <<  tree1.listEdgeDist[node1] << endl;
+        YW_ASSERT_INFO(tree1.listEdgeDist[node1] >= 0.0, "Negative");
+        YW_ASSERT_INFO(mapNodeHeights.find(node2) != mapNodeHeights.end(), "Not found");
+        YW_ASSERT_INFO(node2 < (int)tree1.listEdgeDist.size(), "Wrong");
+        tree1.listEdgeDist[node2] = htNodeCur - mapNodeHeights[node2];
+        YW_ASSERT_INFO(tree1.listEdgeDist[node2] >= 0.0, "Negative");
+        //cout << "Setting edge " << node2 << " to " <<  tree1.listEdgeDist[node2] << endl;
+        //cout << "nodeNew = " << nodeNew << endl;
+        // setup parent of two children to it
+        tree1.SetParent(node1, nodeNew, false);
+        tree1.SetParent(node2, nodeNew, false);
+    }
+
+    //cout << "Edge dist list: ";
+    //DumpDoubleVec(tree1.listEdgeDist);
 }
 
 // find a chain with specified length
-static bool FindChainAtNodeInTree(const MarginalTree &tree1, int nodeHead,
-                                  int lenChain, vector<int> &leaves,
-                                  vector<int> &leaves2) {
-  leaves.clear();
-  leaves2.clear();
-
-  int curn = nodeHead;
-  for (int i = 0; i < lenChain; ++i) {
-    // cout << "curn = " << curn << endl;
-    bool fLeftLeave = tree1.IsLeaf(tree1.GetLeftDescendant(curn));
-    bool fRightLeave = tree1.IsLeaf(tree1.GetRightDescendant(curn));
-    if ((fLeftLeave == true && fRightLeave == true && i < lenChain - 2) ||
-        (fLeftLeave == false && fRightLeave == false)) {
-      // cout << "Fail\n";
-      return false;
-    }
-    // now move down
-    if (fLeftLeave == false) {
-      // put right to store
-      int child = tree1.GetRightDescendant(curn);
-      YW_ASSERT_INFO(tree1.IsLeaf(child), "Not a leaf");
-      leaves.push_back(child);
-      curn = tree1.GetLeftDescendant(curn);
-    } else if (fRightLeave == false) {
-      int child = tree1.GetLeftDescendant(curn);
-      YW_ASSERT_INFO(tree1.IsLeaf(child), "Not a leaf");
-      leaves.push_back(child);
-      curn = tree1.GetRightDescendant(curn);
-    } else {
-      YW_ASSERT_INFO(i >= lenChain - 2, "wrong1");
-      // YW_ASSERT_INFO( tree1.IsLeaf( curn ) == false, " a leaf" );
-      // in this case, we just save itself
-      if (i == lenChain - 2) {
-        leaves2 = leaves;
-        leaves.push_back(tree1.GetLeftDescendant(curn));
-        leaves.push_back(tree1.GetRightDescendant(curn));
-        leaves2.push_back(tree1.GetRightDescendant(curn));
-        leaves2.push_back(tree1.GetLeftDescendant(curn));
-        break;
-      } else {
-        // only save one, try both possibilities
-        leaves2 = leaves;
-        leaves.push_back(tree1.GetLeftDescendant(curn));
-        // save leaves2
-        leaves2.push_back(tree1.GetRightDescendant(curn));
-        break;
-      }
-    }
-  }
-  return true;
+static bool FindChainAtNodeInTree(const MarginalTree &tree1, int nodeHead, int lenChain, vector<int> &leaves,
+                                  vector<int> &leaves2)
+{
+    leaves.clear();
+    leaves2.clear();
+
+    int curn = nodeHead;
+    for (int i = 0; i < lenChain; ++i)
+    {
+        //cout << "curn = " << curn << endl;
+        bool fLeftLeave = tree1.IsLeaf(tree1.GetLeftDescendant(curn));
+        bool fRightLeave = tree1.IsLeaf(tree1.GetRightDescendant(curn));
+        if ((fLeftLeave == true && fRightLeave == true && i < lenChain - 2) || (fLeftLeave == false && fRightLeave == false))
+        {
+            //cout << "Fail\n";
+            return false;
+        }
+        // now move down
+        if (fLeftLeave == false)
+        {
+            // put right to store
+            int child = tree1.GetRightDescendant(curn);
+            YW_ASSERT_INFO(tree1.IsLeaf(child), "Not a leaf");
+            leaves.push_back(child);
+            curn = tree1.GetLeftDescendant(curn);
+        }
+        else if (fRightLeave == false)
+        {
+            int child = tree1.GetLeftDescendant(curn);
+            YW_ASSERT_INFO(tree1.IsLeaf(child), "Not a leaf");
+            leaves.push_back(child);
+            curn = tree1.GetRightDescendant(curn);
+        }
+        else
+        {
+            YW_ASSERT_INFO(i >= lenChain - 2, "wrong1");
+            //YW_ASSERT_INFO( tree1.IsLeaf( curn ) == false, " a leaf" );
+            // in this case, we just save itself
+            if (i == lenChain - 2)
+            {
+                leaves2 = leaves;
+                leaves.push_back(tree1.GetLeftDescendant(curn));
+                leaves.push_back(tree1.GetRightDescendant(curn));
+                leaves2.push_back(tree1.GetRightDescendant(curn));
+                leaves2.push_back(tree1.GetLeftDescendant(curn));
+                break;
+            }
+            else
+            {
+                // only save one, try both possibilities
+                leaves2 = leaves;
+                leaves.push_back(tree1.GetLeftDescendant(curn));
+                // save leaves2
+                leaves2.push_back(tree1.GetRightDescendant(curn));
+                break;
+            }
+        }
+    }
+    return true;
 }
 
 // here, we simply found chain of fixed length (i.e. 4)
-void FindChainsInTree(const MarginalTree &tree1,
-                      map<vector<int>, int> &foundChains) {
-  //
-  foundChains.clear();
+void FindChainsInTree(const MarginalTree &tree1, map<vector<int>, int> &foundChains)
+{
+    //
+    foundChains.clear();
+
+    // we simply enumerate all internal node and trace down from it
+    for (int nn = tree1.GetNumLeaves(); nn < tree1.GetTotNodesNum(); ++nn)
+    {
+        // is this nn a chain-hnead of 4 leaves on one side?
+        vector<int> listLeaves, listLeaves2;
+        if (FindChainAtNodeInTree(tree1, nn, 4, listLeaves, listLeaves2) == true)
+        {
+            //cout << "Found one chain at node nn = " << nn << ", with leaves = ";
+            //DumpIntVec( listLeaves);
+            foundChains.insert(map<vector<int>, int>::value_type(listLeaves, nn));
+
+            if (listLeaves2.size() > 0)
+            {
+                //cout << "Found one chain at node nn = " << nn << ", with leaves = ";
+                //DumpIntVec( listLeaves2);
+                foundChains.insert(map<vector<int>, int>::value_type(listLeaves2, nn));
+            }
+        }
+    }
+}
+
+// construct a marginal tree from nodes and parent info
+// NOTE: this function does not take distance. Therefore, we arbitarily assign nodes to their respective
+// heights and thus also assign branch length
+// ALSO NOTE: when we assign branch length, the branch length are set uniformly distributed within [0-1].
+void InitMarginalTree(MarginalTree &mTree, int numLeaves, const vector<int> &listLabels, const vector<int> &listParentNodePos)
+{
+    //cout << "numLeaves = " << numLeaves << endl;
+    //cout << "InitMarginalTree: numLeaves = " << numLeaves << endl;
+    //cout << "listLabels = ";
+    //DumpIntVec(listLabels);
+    //cout << "listParentNodePos = ";
+    //DumpIntVec( listParentNodePos );
+    //
+    mTree.numLeaves = numLeaves;
+    mTree.listNodeLabels = listLabels;
+    mTree.listParentNodePos = listParentNodePos;
+
+    // now init edge dist
+    mTree.listEdgeDist.clear();
+    int numNonLeafNodes = listLabels.size() - numLeaves;
+    double unitLen = 1.0 / numNonLeafNodes;
+    for (int i = 0; i < (int)listLabels.size() - 1; ++i)
+    {
+        int parPos = listParentNodePos[i] - numLeaves + 1;
+        //cout << "par = " << listParentNodePos[i]  << " for node i = " << i << endl;
+        //cout << "normalized par pos = " << parPos << endl;
+        YW_ASSERT_INFO(parPos > 0, "Fatal error in InitMarginalTree");
+        if (i < numLeaves)
+        {
+            // leaf
+            mTree.listEdgeDist.push_back(parPos * unitLen);
+        }
+        else
+        {
+            // need to subtract current pos
+            int curpos = i - numLeaves + 1;
+            //cout << "curpos = " << curpos << endl;
+            YW_ASSERT_INFO(curpos < parPos, "Trouble in InitMarginalTree");
+            mTree.listEdgeDist.push_back((parPos - curpos) * unitLen);
+        }
+    }
+    // the root has length-0 by default
+    mTree.listEdgeDist.push_back(0.0);
+    // also build up descendents
+    mTree.BuildDescendantInfo();
+}
+
+// find the neighborhood of marginal trees within one NNI operation away (incl. the current tree)
+void FindOneNNIMTreesFrom(MarginalTree &mTreeSrc, vector<MarginalTree> &listNNITrees, vector<pair<int, int>> *pListPairEdgesSwapped)
+{
+    //
+    listNNITrees.clear();
+
+    // process each internal node (w/ at least three leaves below) of the mtree, and
+    for (int node = mTreeSrc.GetNumLeaves(); node < mTreeSrc.GetTotNodesNum(); ++node)
+    {
+        //
+        int nodeLeft = mTreeSrc.GetLeftDescendant(node);
+        int nodeRight = mTreeSrc.GetRightDescendant(node);
+        if (mTreeSrc.IsLeaf(nodeLeft) == true && mTreeSrc.IsLeaf(nodeRight) == true)
+        {
+            // skip if both children are leaves since in this case swapping has no effect
+            continue;
+        }
+        // now swap its two children's subtree in up to four ways
+        int nodesProc1[2], nodesProc2[2];
+        nodesProc1[0] = nodeLeft;
+        nodesProc1[1] = nodeRight;
+        nodesProc2[1] = nodeLeft;
+        nodesProc2[0] = nodeRight;
+        for (int ii = 0; ii < 2; ++ii)
+        {
+            int n1Proc = nodesProc1[ii];
+            int n2Proc = nodesProc2[ii];
+            if (mTreeSrc.IsLeaf(n1Proc) == false)
+            {
+                int node1Left = mTreeSrc.GetLeftDescendant(n1Proc);
+                int node1Right = mTreeSrc.GetRightDescendant(n1Proc);
+                YW_ASSERT_INFO(node1Left >= 0 && node1Right >= 0, "Can not miss");
+
+                // two choices to swap: n2Proc with one of the descendents
+                int nodesProc1Child[2];
+                nodesProc1Child[0] = node1Left;
+                nodesProc1Child[1] = node1Right;
+                for (int jj = 0; jj < 2; ++jj)
+                {
+                    MarginalTree mtreeNNI1 = mTreeSrc;
+                    mtreeNNI1.SwapBranches(nodesProc1Child[jj], n2Proc);
+                    mtreeNNI1.BuildDescendantInfo();
+                    //cout << "After swap: \n";
+                    //mtreeNNI1.Dump();
+                    mtreeNNI1.RearrangeParIncOrder();
+                    //cout << "Found a new mtreeNNI1: " << mtreeNNI1.GetNewick() << endl;
+                    //mtreeNNI1.Dump();
+                    mtreeNNI1.BuildDescendantInfo();
+                    // sort by leaf id: YW: Feb 19,2016
+                    mtreeNNI1.SortByLeafId();
+                    mtreeNNI1.BuildDescendantInfo();
+                    listNNITrees.push_back(mtreeNNI1);
+
+                    if (pListPairEdgesSwapped != NULL)
+                    {
+                        pair<int, int> pp(nodesProc1Child[jj], n2Proc);
+                        pListPairEdgesSwapped->push_back(pp);
+                    }
+                    //cout << "After descendent rebult, " << mtreeNNI1.GetNewick() << endl;
+                    //mtreeNNI1.Dump();
+                }
+            }
+        }
+    }
+    // finally add self
+    listNNITrees.push_back(mTreeSrc);
+    //exit(1);
+}
+
+void CreateSubtreeFromLeaves(MarginalTree &mTreeOrig, const set<int> &setLeafLabels, MarginalTree &mTreeSub, map<int, int> &mapNewNodeToOldNode)
+{
+    //cout << "Original tree: " << mTreeOrig.GetNewick() << ": set of leaves to process: ";
+    //DumpIntSet( setLeafLabels );
+
+    // find a subset of trees with the desired leaves (as matching the given labels)
+    // mapNewNodeToOldNode: new node index ==> old node index
+    map<pair<int, set<int>>, int> mapShrunkLeavesWithNum;
+
+    // get all the clades
+    for (int i = 0; i < mTreeOrig.GetTotNodesNum(); ++i)
+    {
+        //
+        set<int> setGetDesc;
+        mTreeOrig.GetLeavesUnder(i, setGetDesc);
+        set<int> setGetDescLbls;
+        for (set<int>::iterator it = setGetDesc.begin(); it != setGetDesc.end(); ++it)
+        {
+            int lbl = mTreeOrig.GetLabel(*it);
+            setGetDescLbls.insert(lbl);
+        }
+        set<int> sIntsect;
+        JoinSets(setGetDescLbls, setLeafLabels, sIntsect);
+
+        // ignore empty nodes
+        if (sIntsect.size() <= 0)
+        {
+            //
+            continue;
+        }
+
+        // save it
+        pair<int, set<int>> ss(sIntsect.size(), sIntsect);
+        if (mapShrunkLeavesWithNum.find(ss) == mapShrunkLeavesWithNum.end())
+        {
+            mapShrunkLeavesWithNum.insert(map<pair<int, set<int>>, int>::value_type(ss, i));
+        }
+        else
+        {
+            // save the lower (smaller)
+            if (mapShrunkLeavesWithNum[ss] > i)
+            {
+                mapShrunkLeavesWithNum[ss] = i;
+            }
+        }
+    }
+#if 0
+cout << "mapShrunkLeavesWithNum: ";
+for( map< pair<int,set<int> >, int > :: iterator it = mapShrunkLeavesWithNum.begin(); it != mapShrunkLeavesWithNum.end(); ++it )
+{
+cout << "Size: " << it->first.first << ", orig. node = " << it->second << ", set of leaves: ";
+DumpIntSet( it->first.second);
+}
+#endif
+
+    // set up the old and new node position map
+    map<int, int> mapNewToOldPos, mapOldToNewPos;
+    set<int> setNewParsPosOld;
+    int index = 0;
+    for (map<pair<int, set<int>>, int>::iterator it = mapShrunkLeavesWithNum.begin(); it != mapShrunkLeavesWithNum.end(); ++it, ++index)
+    {
+        //
+        mapNewToOldPos.insert(map<int, int>::value_type(index, it->second));
+        mapOldToNewPos.insert(map<int, int>::value_type(it->second, index));
 
-  // we simply enumerate all internal node and trace down from it
-  for (int nn = tree1.GetNumLeaves(); nn < tree1.GetTotNodesNum(); ++nn) {
-    // is this nn a chain-hnead of 4 leaves on one side?
-    vector<int> listLeaves, listLeaves2;
-    if (FindChainAtNodeInTree(tree1, nn, 4, listLeaves, listLeaves2) == true) {
-      // cout << "Found one chain at node nn = " << nn << ", with leaves = ";
-      // DumpIntVec( listLeaves);
-      foundChains.insert(map<vector<int>, int>::value_type(listLeaves, nn));
+        setNewParsPosOld.insert(it->second);
+    }
 
-      if (listLeaves2.size() > 0) {
-        // cout << "Found one chain at node nn = " << nn << ", with leaves = ";
-        // DumpIntVec( listLeaves2);
-        foundChains.insert(map<vector<int>, int>::value_type(listLeaves2, nn));
-      }
+    // now init the tree: note edge labels are ignored!
+    mTreeSub.Clear();
+    mTreeSub.SetNumLeaves(setLeafLabels.size());
+    vector<int> listLbls;
+    PopulateVecBySet(listLbls, setLeafLabels);
+    for (int i = (int)setLeafLabels.size(); i < (int)mapShrunkLeavesWithNum.size(); ++i)
+    {
+        // these are internal nodes
+        listLbls.push_back(-1);
+    }
+    mTreeSub.SetLabelList(listLbls);
+    vector<int> listParPos;
+    // now set up parent
+    for (int i = 0; i < (int)listLbls.size(); ++i)
+    {
+        YW_ASSERT_INFO(mapNewToOldPos.find(i) != mapNewToOldPos.end(), "Fail to find2");
+        int posOrig = mapNewToOldPos[i];
+        int anc = mTreeOrig.GetFirstNonselfAnces(posOrig, setNewParsPosOld);
+        int posNewAnc = -1;
+        if (anc >= 0)
+        {
+            YW_ASSERT_INFO(mapOldToNewPos.find(anc) != mapOldToNewPos.end(), "Fail to find3");
+            posNewAnc = mapOldToNewPos[anc];
+        }
+        listParPos.push_back(posNewAnc);
+    }
+    mTreeSub.SetParList(listParPos);
+
+    // create nodes mapping
+    mapNewNodeToOldNode = mapNewToOldPos;
+
+    //
+    mTreeSub.BuildDescendantInfo();
+
+    // YW: how do we assign branch length
+    UpdateBranchLenInSubtree(mTreeOrig, mapNewNodeToOldNode, mTreeSub);
+#if 0
+cout << "Constructed subtree: " << mTreeSub.GetNewick() << endl;
+mTreeSub.Dump();
+cout << "mapNewNodeToOldNode: ";
+for(map<int,int> :: iterator it=mapNewNodeToOldNode.begin(); it != mapNewNodeToOldNode.end(); ++it)
+{
+cout << "[" << it->first << "," << it->second << "]  ";
+}
+cout << endl;
+#endif
+}
+
+void UpdateBranchLenInSubtree(MarginalTree &mTreeOrig, map<int, int> &mapNewNodeToOldNode, MarginalTree &mTreeSub)
+{
+    // inverse map
+    //map<int,int> mapOldNodeToNewNode;
+    //for( map<int,int> :: iterator it = mapNewNodeToOldNode.begin(); it != mapNewNodeToOldNode.end(); ++it  )
+    //{
+    //    //
+    //    YW_ASSERT_INFO( mapOldNodeToNewNode.find(it->second) == mapOldNodeToNewNode.end(), "Wrong" );
+    //    mapOldNodeToNewNode.insert( map<int,int> :: value_type(it->second, it->first) );
+    //}
+
+    //
+    vector<double> listBrLens;
+    for (map<int, int>::iterator it = mapNewNodeToOldNode.begin(); it != mapNewNodeToOldNode.end(); ++it)
+    {
+        double distcur = 0.0;
+        //
+        int pnew = it->first;
+        int pold = it->second;
+        int pnewpar = mTreeSub.GetParent(pnew);
+        if (pnewpar >= 0)
+        {
+            YW_ASSERT_INFO(mapNewNodeToOldNode.find(pnewpar) != mapNewNodeToOldNode.end(), "Fail to find");
+            int poldpar = mapNewNodeToOldNode[pnewpar];
+            distcur = mTreeOrig.GetPathLen(pold, poldpar);
+        }
+
+        listBrLens.push_back(distcur);
+    }
+    mTreeSub.SetBranchLenList(listBrLens);
+}
+
+void FindMatchedSubtrees(MarginalTree &mtreeNew, MarginalTree &mtreeRef, map<int, int> &mapSTNewToRef)
+{
+    // find the shared subtrees that are in both trees, then create a map: map the subtree index in mtreeNew to mtreeRef
+    // find all branches (subtrees below them) that are not in the reference tree
+    // setDiffBrs: in this tree but not in reference tree
+    // setDiffRefMissed: in reference tree but not in this tree
+    vector<set<int>> listSubtreesNew, listSubtreesRef;
+    mtreeNew.ConsDecedentLeavesInfoLabels(listSubtreesNew);
+    mtreeRef.ConsDecedentLeavesInfoLabels(listSubtreesRef);
+
+    // create fast searching
+    map<set<int>, int> mapIndexSTRef;
+    for (int i = 0; i < (int)listSubtreesRef.size(); ++i)
+    {
+        mapIndexSTRef.insert(map<set<int>, int>::value_type(listSubtreesRef[i], i));
+    }
+
+    //
+    mapSTNewToRef.clear();
+    for (int i = 0; i < (int)listSubtreesNew.size(); ++i)
+    {
+        if (mapIndexSTRef.find(listSubtreesNew[i]) == mapIndexSTRef.end())
+        {
+            mapSTNewToRef.insert(map<int, int>::value_type(i, mapIndexSTRef[listSubtreesNew[i]]));
+        }
+    }
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Define a utility class
+
+MarginalTree ::MarginalTree() : numLeaves(0)
+{
+    // here, we initiailize distance
+    // TBD
+}
+
+void MarginalTree ::Clear()
+{
+    numLeaves = 0;
+    listNodeLabels.clear();
+    listParentNodePos.clear();
+    listEdgeDist.clear();
+    listLeftDescs.clear();
+    listRightDescs.clear();
+}
+
+void MarginalTree ::BuildDescendantInfo()
+{
+    // Note, this only works for binary tree!!!!!
+    listLeftDescs.clear();
+    listRightDescs.clear();
+    int numNodes = GetTotNodesNum();
+    //cout << "BuildDescendantInfo: numNodes: " << numNodes << endl;
+    listLeftDescs.resize(numNodes);
+    listRightDescs.resize(numNodes);
+
+    // for leaves, there is no children
+    for (int i = 0; i < numNodes; ++i)
+    {
+        listLeftDescs[i] = -1;
+        listRightDescs[i] = -1;
+    }
+
+    // handle other cases
+    for (int i = 0; i < numNodes; ++i)
+    {
+        int p = GetParent(i);
+        //cout << "paret of " << i << " is " << p << endl;
+        if (p < 0)
+        {
+            continue;
+        }
+        // setup p's child to i
+        if (listLeftDescs[p] < 0)
+        {
+            listLeftDescs[p] = i;
+            //cout << "Set left descendent of " << p << " to be " << i << endl;
+        }
+        else
+        {
+            if (listRightDescs[p] >= 0)
+            {
+                cout << "Something wrong: the current tree:";
+                Dump();
+            }
+
+            // make sure this is binary tree
+            YW_ASSERT_INFO(listRightDescs[p] < 0, "Not a binary tree2");
+            listRightDescs[p] = i;
+            //cout << "Set right descendent of " << p << " to be " << i << endl;
+        }
+    }
+}
+
+bool MarginalTree ::IsToplogicSame(const MarginalTree &tree) const
+{
+    // this function test whether two things are topologically the same
+    if (GetTotNodesNum() != tree.GetTotNodesNum())
+    {
+        //cout << "Tree node numbers are not equal\n";
+        // nodes number are different, then different
+        return false;
+    }
+    //
+    if (GetNumLeaves() != tree.GetNumLeaves())
+    {
+        //cout << "Tree leaf numbers are not equal\n";
+        return false;
+    }
+    // make sure node id the same
+    //if( listNodeLabels != tree.listNodeLabels )
+    //{
+//cout << "Tree node ids are not equal\n";
+//        return false;
+//    }
+#if 0
+    if( listParentNodePos != tree.listParentNodePos  )
+    {
+//cout << "Tree node parents are not equal\n";
+        return false;
+    }
+#endif
+
+    // sort the leaves
+    //MarginalTree t1 = *this;
+    //MarginalTree t2 = tree;
+    //t1.SortByLeafId();
+    //t2.SortByLeafId();
+    vector<set<int>> t1splits, t2splits;
+    ConsDecedentLeavesInfo(t1splits);
+    tree.ConsDecedentLeavesInfo(t2splits);
+    set<set<int>> st1splits, st2splits;
+    for (int i = 0; i < (int)t1splits.size(); ++i)
+    {
+        st1splits.insert(t1splits[i]);
+    }
+    for (int i = 0; i < (int)t2splits.size(); ++i)
+    {
+        st2splits.insert(t2splits[i]);
+    }
+
+    if (st1splits != st2splits)
+    {
+        //cout << "Tree node parents are not equal\n";
+        //cout << "** tree 1: \n";
+        //for(int i=0; i<(int)t1splits.size(); ++i )
+        //{
+        //DumpIntSet(t1splits[i]);
+        //}
+        //cout << "** tree 2: \n";
+        //for(int i=0; i<(int)t2splits.size(); ++i )
+        //{
+        //DumpIntSet(t2splits[i]);
+        //}
+        return false;
+    }
+
+    return true;
+}
+
+int MarginalTree ::GetLeftDescendant(int node) const
+{
+    YW_ASSERT_INFO((int)listLeftDescs.size() == GetTotNodesNum() &&
+                       (int)listRightDescs.size() == GetTotNodesNum(),
+                   "descendant info not set");
+    return listLeftDescs[node];
+}
+int MarginalTree ::GetRightDescendant(int node) const
+{
+    YW_ASSERT_INFO((int)listLeftDescs.size() == GetTotNodesNum() &&
+                       (int)listRightDescs.size() == GetTotNodesNum(),
+                   "descendant info not set");
+    return listRightDescs[node];
+}
+
+int MarginalTree ::GetFirstNonselfAnces(int v, const set<int> &setAnces) const
+{
+    // find the first non-self ancestor from the list; if not found return -1
+    int res = -1;
+
+    int ncv = v;
+    while (ncv >= 0)
+    {
+        // get parent
+        ncv = GetParent(ncv);
+        if (setAnces.find(ncv) != setAnces.end())
+        {
+            res = ncv;
+            break;
+        }
+    }
+
+    return res;
+}
+
+void MarginalTree ::InitDefaultEdgeLen()
+{
+    listEdgeDist.clear();
+
+    // the default assume the following:
+    // (a) all leaves are on the same level
+    // (b) the rest of tree nodes are orgnized uniformly in distance
+    for (int i = 0; i < GetTotNodesNum() - 1; ++i)
+    {
+        double distRel = GetDefaultEdgeLen(i);
+        listEdgeDist.push_back(distRel);
+    }
+    // the root has no edge here
+    listEdgeDist.push_back(0.0);
+}
+
+void MarginalTree ::InitUnitEdgelen()
+{
+    //
+    listEdgeDist.clear();
+
+    // the default assume the following:
+    // (a) all leaves are on the same level
+    // (b) the rest of tree nodes are orgnized uniformly in distance
+    for (int i = 0; i < GetTotNodesNum() - 1; ++i)
+    {
+        listEdgeDist.push_back(1.0);
+    }
+    // the root has no edge here
+    listEdgeDist.push_back(0.0);
+}
+
+double MarginalTree ::GetDefaultEdgeLen(int child)
+{
+    int curpos = child;
+    int parpos = listParentNodePos[child];
+
+    int punorm = CalcNormHeight(parpos);
+    int plnorm = CalcNormHeight(curpos);
+    int numLeaves = GetNumLeaves();
+
+    if (punorm >= numLeaves)
+    {
+        punorm = numLeaves - 1;
+    }
+    if (plnorm >= numLeaves)
+    {
+        plnorm = numLeaves - 1;
+    }
+    // YW: changed back to old distance, 082306, to see if this matters
+    double res = 2.0 * (1.0 / (numLeaves - punorm) - 1.0 / (numLeaves - plnorm + 1));
+    //cout << "numLeaves = " << numLeaves << ", punorm = " <<  punorm << ", plnorm = " << plnorm << ", res = " << res << endl;
+    // here we assume the distrbution of time is according to exponential distibution of mean 2.0/k(k+1) waiting time
+    return res;
+}
+
+void MarginalTree ::SetParent(int child, int par, bool fAdjLen)
+{
+    YW_ASSERT_INFO(child < GetTotNodesNum() && par < GetTotNodesNum(), "Wrong here");
+    listParentNodePos[child] = par;
+    // also setup height
+    if (fAdjLen == true)
+    {
+        listEdgeDist[child] = GetDefaultEdgeLen(child);
+    }
+}
+
+void MarginalTree ::SwapBranches(int nodeBranch1, int nodeBranch2)
+{
+    //cout << "Swapping nodes: " << nodeBranch1 << ", " << nodeBranch2 << endl;
+    // swap two branches ending at the two nodes passed in; here assume the branch length will not change
+    // note: may need to reset some other descendents' info after this
+    int p1 = GetParent(nodeBranch1);
+    int p2 = GetParent(nodeBranch2);
+    SetParent(nodeBranch1, p2, false);
+    SetParent(nodeBranch2, p1, false);
+}
+
+int MarginalTree ::CalcNormHeight(int node)
+{
+    int normHt = node - (GetNumLeaves() - 1);
+    if (normHt < 0)
+    {
+        normHt = 0;
+    }
+    return normHt;
+}
+
+void MarginalTree ::Binarize()
+{
+    // first initialize distance if not yet
+    if (listEdgeDist.size() == 0)
+    {
+        InitDefaultEdgeLen();
+    }
+
+    // assume distance has been set properly
+    YW_ASSERT_INFO(listEdgeDist.size() > 0, "Tree edge length not set");
+
+    // This function makes this marginal binary
+    vector<int> updatedLabels, updatedPars;
+    vector<double> updatedDist;
+
+    // find out the current largest label, for the purpose of adding new labels
+    int maxLabel = -1;
+    for (int i = 0; i < (int)listNodeLabels.size(); ++i)
+    {
+        if (listNodeLabels[i] > maxLabel)
+        {
+            maxLabel = listNodeLabels[i];
+        }
+    }
+    int labelNextToUse = maxLabel + 1;
+
+    // before doing anything, get the descendent info for each tree node
+    vector<vector<int>> listDescendentsVec;
+    ConsDecedentInfo(listDescendentsVec);
+    //vector< set<int> > listDescendents;
+    //for( unsigned int i=0; i<listDescendents.size(); ++i )
+    //{
+    //    set<int> tmpSet;
+    //    PopulateSetByVec( tmpSet,listDescendentsVec[i]  );
+    //    listDescendents.push_back(tmpSet);
+    //}
+
+    // we need another auxilary data structure to map old position to new position
+    // we need this because we are adding some new nodes between two old nodes
+    vector<int> mapOldPosToNewPos(GetTotNodesNum());
+
+    // first copy every thing up to the leaves
+    for (int i = 0; i < numLeaves; ++i)
+    {
+        updatedLabels.push_back(listNodeLabels[i]);
+        updatedPars.push_back(listParentNodePos[i]);
+        updatedDist.push_back(listEdgeDist[i]);
+
+        // leaf is never changed position
+        mapOldPosToNewPos[i] = i;
+    }
+    // now we treat each internal node one by one, and split it when needed
+    for (int i = numLeaves; i < GetTotNodesNum(); ++i)
+    {
+        // the first thing to do is: find children from the constructed portion of tree
+        vector<int> &listChildren = listDescendentsVec[i];
+        //cout << "IN node = " << i << ", children num = " << listChildren.size() << endl;
+
+        // do nothing if there is no mor than 2 children
+        // it is possible that an internal node does not have any children
+        // Then what to do here? TBD
+        if (listChildren.size() == 2 || listChildren.size() == 0)
+        {
+            //cout << "Simply go over the originals...\n";
+            updatedLabels.push_back(listNodeLabels[i]);
+            updatedPars.push_back(listParentNodePos[i]); // do it for now, will update later
+            updatedDist.push_back(listEdgeDist[i]);
+
+            // record current position
+            mapOldPosToNewPos[i] = (int)updatedLabels.size() - 1;
+
+            // now update its children's parent to this new location
+            for (int jjj = 0; jjj < (int)listChildren.size(); ++jjj)
+            {
+                int oldpos = listChildren[jjj];
+                int newpos = mapOldPosToNewPos[oldpos];
+                updatedPars[newpos] = mapOldPosToNewPos[i];
+            }
+
+            continue;
+        }
+        if (listChildren.size() == 1)
+        {
+            // we should remove this node
+            int childOldPos = listChildren[0];
+            // skip this node, but update the node
+            // let its (only) child points to its parent
+            //cout << "childOldPos = " << childOldPos << endl;
+            listParentNodePos[childOldPos] = listParentNodePos[i];
+            //cout << "childOldPos's parent set to  = " << listParentNodePos[i] << endl;
+
+            // also update listChildren
+            if (listParentNodePos[i] >= 0)
+            {
+                int pppos = listParentNodePos[i];
+                vector<int> listNewChildAtIParent;
+                for (int ii = 0; ii < (int)listDescendentsVec[pppos].size(); ++ii)
+                {
+                    if (i != listDescendentsVec[pppos][ii])
+                    {
+                        // do not append i anymore
+                        listNewChildAtIParent.push_back(listDescendentsVec[pppos][ii]);
+                    }
+                }
+                //
+                YW_ASSERT_INFO((int)listNewChildAtIParent.size() == (int)listDescendentsVec[pppos].size() - 1,
+                               "Something wrong");
+                // append a new thing
+                listNewChildAtIParent.push_back(childOldPos);
+                // update the orginal list
+                listDescendentsVec[pppos] = listNewChildAtIParent;
+            }
+            else
+            {
+                int newpos = mapOldPosToNewPos[childOldPos];
+                updatedPars[newpos] = -1;
+                updatedDist[newpos] = 0.0;
+            }
+            continue;
+        }
+
+        // otherwise, we have to split the node
+        for (int jjj = 0; jjj < (int)listChildren.size() - 2; ++jjj)
+        {
+            updatedLabels.push_back(labelNextToUse++); // new IN is assigned an arbitary label
+            updatedPars.push_back(-1);                 // do it for now, will update later
+            // for any new internal node, edge length (out of it) is 0
+            updatedDist.push_back(0.0);
+
+            // now update children
+            int curINPos = (int)updatedLabels.size() - 1;
+            if (jjj == 0)
+            {
+                // Then we use the first original child
+                int oldpos = listChildren[0];
+                int newpos = mapOldPosToNewPos[oldpos];
+                updatedPars[newpos] = curINPos;
+            }
+            else
+            {
+                // otherwise, we use the previous IN
+                updatedPars[curINPos - 1] = curINPos;
+            }
+            // the right branch is always an original branch
+            int oldpos = listChildren[jjj + 1];
+            int newpos = mapOldPosToNewPos[oldpos];
+            updatedPars[newpos] = curINPos;
+        }
+        // now we append the original internal node in
+        updatedLabels.push_back(listNodeLabels[i]);
+        updatedPars.push_back(listParentNodePos[i]); // do it for now, will update later
+        updatedDist.push_back(listEdgeDist[i]);
+
+        // record current position
+        mapOldPosToNewPos[i] = (int)updatedLabels.size() - 1;
+
+        // update its two children, one of them is the last new node to add
+        updatedPars[(int)updatedPars.size() - 2] = mapOldPosToNewPos[i];
+        int oldpos = listChildren[(int)listChildren.size() - 1];
+        int newpos = mapOldPosToNewPos[oldpos];
+        updatedPars[newpos] = mapOldPosToNewPos[i];
+    }
+    // finally, we update the mtree
+    this->listNodeLabels = updatedLabels;
+    this->listParentNodePos = updatedPars;
+    this->listEdgeDist = updatedDist;
+
+    // check to make sure this is indeed binary
+    YW_ASSERT_INFO(this->listNodeLabels.size() == this->listParentNodePos.size(),
+                   "In binaralize: size wrong1");
+    YW_ASSERT_INFO(this->listNodeLabels.size() == this->listEdgeDist.size(),
+                   "In binaralize: size wrong1");
+    // now iterator the degree
+#if 0
+    vector<int> nodeOutDegrees;
+    for(int i=0; i<(int)this->listNodeLabels.size(); ++i)
+    {
+        nodeOutDegrees.push_back( 0 );
+    }
+    for(int i=0; i<(int)this->listNodeLabels.size(); ++i)
+    {
+        int ppos = listParentNodePos[i] ;
+        YW_ASSERT_INFO( ppos < (int)listParentNodePos.size(), "pos wrong" );
+        if( ppos >= 0 )
+        {
+            nodeOutDegrees[ ppos ]++;
+            if( nodeOutDegrees[ ppos ] >= 3 )
+            {
+                YW_ASSERT_INFO( false, "Error in binarinize." );
+            }
+        }
+    }
+#endif
+    //Dump();
+}
+
+void MarginalTree ::Consolidate()
+{
+    //cout << "Before consolidate, tree = ";
+    //this->Dump();
+    // Remove degree-2 intermediate nodes
+    // first find out which nodes are those to be removed
+    set<int> nodesToDel;
+    // this is very simple: scan parent list
+    // if a node (non-leaf) only appears at most once of them, then remove it
+    vector<int> occurTimes;
+    vector<bool> nodeVisitedFlags;
+    for (int i = 0; i < GetTotNodesNum(); ++i)
+    {
+        occurTimes.push_back(0);
+        nodeVisitedFlags.push_back(false);
+    }
+    stack<int> nodesToExplore;
+    for (int i = 0; i < GetNumLeaves(); ++i)
+    {
+        nodesToExplore.push(i);
+    }
+    while (nodesToExplore.empty() == false)
+    {
+        // find one node
+        int node = nodesToExplore.top();
+        nodesToExplore.pop();
+
+        // if this is already visited, skip
+        if (nodeVisitedFlags[node] == true)
+        {
+            continue;
+        }
+        // this is a new node, so explore it
+        nodeVisitedFlags[node] = true;
+        int pp = GetParent(node);
+        if (pp >= 0)
+        {
+            nodesToExplore.push(pp);
+            occurTimes[pp]++;
+        }
+    }
+    // now figure out how many to remove up to a point
+    vector<int> listNumDelItems;
+    for (int i = 0; i < GetNumLeaves(); ++i)
+    {
+        listNumDelItems.push_back(0);
+    }
+    int numToDelete = 0;
+    for (int i = GetNumLeaves(); i < GetTotNodesNum(); ++i)
+    {
+        if (occurTimes[i] <= 1 && i != GetTotNodesNum() - 1)
+        {
+            numToDelete++;
+        }
+        listNumDelItems.push_back(numToDelete);
+    }
+
+    // now store a new set of items
+    vector<int> listNodeLabelsNew;
+    vector<int> listParentNodePosNew;
+    vector<double> listEdgeDistNew;
+    // now mark those with at most once to be deleted
+    for (int i = 0; i < GetTotNodesNum(); ++i)
+    {
+        // leaves and the root is always there
+        if (occurTimes[i] > 1 || i < GetNumLeaves() || i == GetTotNodesNum() - 1)
+        {
+            listNodeLabelsNew.push_back(listNodeLabels[i]);
+
+            // for parent, we trace upwards until either find a occur time > 1 or root
+            double distNew = listEdgeDist[i];
+            int parNew = GetParent(i);
+            // now trace back to see if we need them
+            while (occurTimes[parNew] <= 1 && parNew >= 0)
+            {
+                int parNext = GetParent(parNew);
+                if (parNext < 0)
+                {
+                    break;
+                }
+                distNew += listEdgeDist[parNew];
+                parNew = parNext;
+            }
+
+            // save this (and make adjustment)
+            int parToSet = parNew - listNumDelItems[parNew];
+            if (parToSet < 0)
+            {
+                parToSet = -1;
+            }
+            listParentNodePosNew.push_back(parToSet);
+            listEdgeDistNew.push_back(distNew);
+        }
+    }
+
+    // finally store this
+    listNodeLabels = listNodeLabelsNew;
+    listParentNodePos = listParentNodePosNew;
+    listEdgeDist = listEdgeDistNew;
+
+    //cout << "After consolidate, tree = ";
+    //this->Dump();
+}
+
+double MarginalTree ::GetEdgeLen(int childNodeIndex) const
+{
+    YW_ASSERT_INFO(childNodeIndex < (int)listEdgeDist.size(), "List overflow");
+    return listEdgeDist[childNodeIndex];
+}
+
+double MarginalTree ::GetTotEdgeLen() const
+{
+    //
+    double res = 0.0;
+    for (int i = 0; i < GetTotNodesNum(); ++i)
+    {
+        if (i != GetRoot())
+        {
+            res += GetEdgeLen(i);
+        }
+    }
+    return res;
+}
+
+void MarginalTree ::ConsDecedentInfo(vector<vector<int>> &descNodes) const
+{
+    descNodes.clear();
+    int numNodes = GetTotNodesNum();
+    //vector< vector<int> > listDescendents;
+    for (int i = 0; i < numNodes; ++i)
+    {
+        vector<int> emptyVec;
+        descNodes.push_back(emptyVec);
+    }
+    for (int i = 0; i < numNodes; ++i)
+    {
+        int parpos = listParentNodePos[i];
+        if (parpos >= 0)
+        {
+            descNodes[parpos].push_back(i);
+        }
+    }
+    //cout << "Descedents info:\n";
+    //for( unsigned int i=0; i<descNodes.size(); ++i)
+    //{
+    //DumpIntVec( descNodes[i] );
+    //}
+}
+
+void MarginalTree ::ConsAllDecedentInfo(vector<set<int>> &descNodes, bool fIncSelf) const
+{
+    descNodes.clear();
+    int numNodes = GetTotNodesNum();
+    //vector< vector<int> > listDescendents;
+    for (int i = 0; i < numNodes; ++i)
+    {
+        set<int> emptySet;
+        descNodes.push_back(emptySet);
+    }
+    for (int i = 0; i < numNodes; ++i)
+    {
+        // Always contain itself if set
+        if (fIncSelf == true)
+        {
+            descNodes[i].insert(i);
+        }
+
+        int parpos = listParentNodePos[i];
+        if (parpos >= 0)
+        {
+            UnionSets(descNodes[parpos], descNodes[i]);
+            if (fIncSelf == false)
+            {
+                // otherwise, we need to append this current node to
+                descNodes[parpos].insert(i);
+            }
+        }
+    }
+}
+
+void MarginalTree ::ConsDecedentLeavesInfo(vector<set<int>> &descLaves) const
+{
+    descLaves.clear();
+    //vector< vector<int> > listDescendents;
+    int numNodes = GetTotNodesNum();
+    for (int i = 0; i < numNodes; ++i)
+    {
+        set<int> emptyVec;
+        descLaves.push_back(emptyVec);
+    }
+    for (int i = 0; i < numNodes; ++i)
+    {
+        // If this is a leave, push itself into
+        if (i < numLeaves)
+        {
+            descLaves[i].insert(i);
+        }
+
+        int parpos = listParentNodePos[i];
+        if (parpos >= 0)
+        {
+            UnionSets(descLaves[parpos], descLaves[i]);
+        }
+    }
+    //cout << "Descedents info:\n";
+    //for( unsigned int i=0; i<descLaves.size(); ++i)
+    //{
+    //DumpIntSet( descLaves[i] );
+    //}
+}
+
+void MarginalTree ::ConsDecedentLeavesInfoLabels(vector<set<int>> &leafNodeLabels) const
+{
+    //
+    leafNodeLabels.clear();
+    vector<set<int>> leafNodePos;
+    ConsDecedentLeavesInfo(leafNodePos);
+    for (int i = 0; i < (int)leafNodePos.size(); ++i)
+    {
+        set<int> ss;
+        for (set<int>::const_iterator it = leafNodePos[i].begin(); it != leafNodePos[i].end(); ++it)
+        {
+            ss.insert(GetLabel(*it));
+        }
+        leafNodeLabels.push_back(ss);
+    }
+}
+
+void MarginalTree ::FindAllSplits(vector<set<int>> &listSplits) const
+{
+    //
+    listSplits.clear();
+    //vector< vector<int> > listDescendents;
+    int numNodes = GetTotNodesNum();
+    for (int i = 0; i < numNodes; ++i)
+    {
+        set<int> emptyVec;
+        listSplits.push_back(emptyVec);
+    }
+    for (int i = 0; i < numNodes; ++i)
+    {
+        // If this is a leave, push itself into
+        if (i < numLeaves)
+        {
+            listSplits[i].insert(GetLabel(i));
+        }
+
+        int parpos = listParentNodePos[i];
+        if (parpos >= 0)
+        {
+            UnionSets(listSplits[parpos], listSplits[i]);
+        }
+    }
+}
+
+int MarginalTree ::GetParent(int child) const
+{
+    if (child >= GetTotNodesNum())
+    {
+        cout << "child = " << child << ", tot num of nodes = " << GetTotNodesNum() << endl;
+    }
+    YW_ASSERT_INFO(child < GetTotNodesNum(), "Range bug");
+    return listParentNodePos[child];
+}
+
+void MarginalTree ::ConsHeightsInfo(vector<int> &nodesHt) const
+{
+    nodesHt.clear();
+    int numNodes = GetTotNodesNum();
+    for (int i = 0; i < numNodes; ++i)
+    {
+        nodesHt.push_back(0);
+    }
+    for (int i = 0; i < numNodes; ++i)
+    {
+        // test whether the parent node should be updated its height
+        int parpos = listParentNodePos[i];
+        if (parpos >= 0 && nodesHt[parpos] < nodesHt[i] + 1)
+        {
+            nodesHt[parpos] = nodesHt[i] + 1;
+        }
+    }
+}
+
+void MarginalTree ::Dump() const
+{
+    // Output marginal tree states
+    cout << "Tree: number of leaves: " << numLeaves << endl;
+    cout << "Node list = ";
+    DumpIntVec(this->listNodeLabels);
+    cout << "Parent list = ";
+    DumpIntVec(this->listParentNodePos);
+    cout << "Tree dist = ";
+    DumpDoubleVec(this->listEdgeDist);
+}
+
+int MarginalTree ::GetPosForLabel(int lbl) const
+{
+    //
+    int res = -1;
+    for (int i = 0; i < (int)listNodeLabels.size(); ++i)
+    {
+        if (listNodeLabels[i] == lbl)
+        {
+            res = i;
+            break;
+        }
+    }
+    return res;
+}
+
+int MarginalTree ::GetMRCA(int v1, int v2) const
+{
+    // retrieve MRCA from it
+    //cout << "v1 = " << v1 << ", v2= " << v2 << endl;
+    int n1 = v1, n2 = v2;
+    while (n1 != n2)
+    {
+        // we alternatively move up, depend on which one is smaller
+        if (n1 < n2)
+        {
+            // move n1
+            n1 = GetParent(n1);
+        }
+        else
+        {
+            // move n2
+            n2 = GetParent(n2);
+        }
+        //cout << "GetMRCA1: n1 = " << n1 << ", n2 = " << n2 << endl;
+    }
+    // n1 (or n2) is the result)
+    return n1;
+}
+
+void MarginalTree ::GetChildren(int node, set<int> &listChildren) const
+{
+    listChildren.clear();
+
+    // we just search parent list to see who has entry equal to node
+    for (int i = 0; i < (int)listParentNodePos.size(); ++i)
+    {
+        if (listParentNodePos[i] == node)
+        {
+            listChildren.insert(i);
+        }
+    }
+}
+
+int MarginalTree ::GetMaxHt() const
+{
+    vector<int> heights;
+    ConsHeightsInfo(heights);
+    int maxHt = 0;
+    for (int i = 0; i < (int)heights.size(); ++i)
+    {
+        if (maxHt < heights[i])
+        {
+            maxHt = heights[i];
+        }
+    }
+    return maxHt;
+}
+
+double MarginalTree ::GetHeight() const
+{
+    int root = GetRoot();
+    return GetHeightOfNode(root);
+}
+double MarginalTree ::GetHeightOfNode(int node) const
+{
+    // get descendent
+    int lchild = GetLeftDescendant(node);
+    int rchild = GetRightDescendant(node);
+    if (lchild < 0 || rchild < 0)
+    {
+        return 0.0;
+    }
+    return max(GetEdgeLen(lchild) + GetHeightOfNode(lchild), GetEdgeLen(rchild) + GetHeightOfNode(rchild));
+}
+
+void MarginalTree ::RemoveLeafNodeFromBinaryTree(int lfn)
+{
+    YW_ASSERT_INFO(IsLeaf(lfn) == true, "Not a leaf");
+    // rmeove a leaf node (and suppress the degree-2 node if so
+    // first fill in leaves
+    vector<int> listNodeLabelsNew;
+    vector<int> listParentNodePosNew;
+    int pp = GetParent(lfn);
+    for (int i = 0; i < GetTotNodesNum(); ++i)
+    {
+        if (i != lfn && i != pp)
+        {
+            listNodeLabelsNew.push_back(this->listNodeLabels[i]);
+
+            int parNew;
+            int oldPar = GetParent(i);
+            if (oldPar < pp)
+            {
+                // just minus 1
+                parNew = oldPar - 1;
+            }
+            else if (oldPar > pp)
+            {
+                // otherwise, we lost two
+                parNew = oldPar - 2;
+            }
+            else
+            {
+                // In this case, we are pointing to pp, since pp is removed, we need to move up by one
+                parNew = GetParent(pp) - 2;
+            }
+            if (parNew < 0)
+            {
+                parNew = -1;
+            }
+            listParentNodePosNew.push_back(parNew);
+        }
+    }
+    //
+    this->listNodeLabels = listNodeLabelsNew;
+    this->listParentNodePos = listParentNodePosNew;
+
+    this->numLeaves--;
+}
+
+bool MarginalTree ::AreTwoPathsDisjoint(int sn1, int en1, int sn2, int en2) const
+{
+    // test whether two path (sn1, en1) and (sn2, en2) are (vertex) disjoint
+    // note that for binary tree, this is also checking for edge disjoint
+    // we use a dumb method here
+    set<int> nodesVisitedTree1;
+
+    int n1 = sn1, n2 = en1;
+    nodesVisitedTree1.insert(n1);
+    nodesVisitedTree1.insert(n2);
+    while (n1 != n2)
+    {
+        // we alternatively move up, depend on which one is smaller
+        int nodeNew;
+        if (n1 < n2)
+        {
+            // move n1
+            n1 = GetParent(n1);
+            nodeNew = n1;
+        }
+        else
+        {
+            // move n2
+            n2 = GetParent(n2);
+            nodeNew = n2;
+        }
+
+        //
+        nodesVisitedTree1.insert(nodeNew);
+    }
+    //cout << "Path 1=";
+    //DumpIntSet( nodesVisitedTree1 );
+    // now we move on to the next pair
+    n1 = sn2;
+    n2 = en2;
+    if (nodesVisitedTree1.find(n1) != nodesVisitedTree1.end() ||
+        nodesVisitedTree1.find(n2) != nodesVisitedTree1.end())
+    {
+        return false;
+    }
+    while (n1 != n2)
+    {
+        // we alternatively move up, depend on which one is smaller
+        int nodeNew;
+        if (n1 < n2)
+        {
+            // move n1
+            n1 = GetParent(n1);
+            nodeNew = n1;
+        }
+        else
+        {
+            // move n2
+            n2 = GetParent(n2);
+            nodeNew = n2;
+        }
+
+        //
+        if (nodesVisitedTree1.find(nodeNew) != nodesVisitedTree1.end())
+        {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+int MarginalTree ::GetPath(int sn, int en, set<int> &edgesOnPath) const
+{
+    // find edges on the path, and return the MRCA
+    int n1 = sn, n2 = en;
+    edgesOnPath.insert(n1);
+    edgesOnPath.insert(n2);
+    while (n1 != n2)
+    {
+        // we alternatively move up, depend on which one is smaller
+        int nodeNew;
+        if (n1 < n2)
+        {
+            // move n1
+            n1 = GetParent(n1);
+            nodeNew = n1;
+        }
+        else
+        {
+            // move n2
+            n2 = GetParent(n2);
+            nodeNew = n2;
+        }
+
+        //
+        edgesOnPath.insert(nodeNew);
+    }
+    // remove MRCA from result
+    YW_ASSERT_INFO(edgesOnPath.find(n1) != edgesOnPath.end(), "wrong2");
+    edgesOnPath.erase(n1);
+
+    return n1;
+}
+
+double MarginalTree ::GetPathLen(int sn, int en)
+{
+    // get the branch lenggth on the path
+    double res = 0.0;
+
+    set<int> edgesOnPath;
+    int mrca = GetPath(sn, en, edgesOnPath);
+    YW_ASSERT_INFO(edgesOnPath.find(mrca) == edgesOnPath.end(), "Fail to find");
+    for (set<int>::iterator it = edgesOnPath.begin(); it != edgesOnPath.end(); ++it)
+    {
+        res += GetEdgeLen(*it);
+    }
+    return res;
+}
+
+void MarginalTree ::OutputGML(const char *fileName) const
+{
+    // Now output a file in GML format
+    // First create a new name
+    string name = fileName;
+    //cout << "num edges = " << listEdges.size() << endl;
+
+    DEBUG("FileName=");
+    DEBUG(name);
+    DEBUG("\n");
+    // Now open file to write out
+    ofstream outFile(name.c_str());
+
+    // First output some header info
+    outFile << "graph [\n";
+    outFile << "comment ";
+    OutputQuotedString(outFile, "Automatically generated by Graphing tool");
+    outFile << "\ndirected  1\n";
+    outFile << "id  1\n";
+    outFile << "label ";
+    OutputQuotedString(outFile, "Marginal Tree....\n");
+
+    // Now output all the vertices
+    //	int i;
+
+    //cout << "a.1.1\n";
+    for (int i = 0; i < (int)listNodeLabels.size(); ++i)
+    {
+        outFile << "node [\n";
+
+        outFile << "id " << i << endl;
+        outFile << "label ";
+        char buf[80];
+        //        sprintf(buf, "n%d",  listNodeLabels[i]  );
+        sprintf(buf, "n%d", i);
+
+        OutputQuotedString(outFile, buf);
+        outFile << endl;
+
+        // See if we need special shape here
+        outFile << "defaultAtrribute   1\n";
+
+        outFile << "]\n";
+    }
+    //cout << "a.1.3\n";
+
+    // Now output all the edges, by again starting from root and output all nodes
+    for (int i = 0; i < (int)listParentNodePos.size(); ++i)
+    {
+        int parpos = listParentNodePos[i];
+
+        //cout << "Output an edge \n";
+        outFile << "edge [\n";
+        outFile << "source " << parpos << endl;
+        outFile << "target  " << i << endl;
+        outFile << "label ";
+        OutputQuotedString(outFile, "");
+        outFile << "\n";
+        outFile << "]\n";
+    }
+
+    // Finally quite after closing file
+    outFile << "\n]\n";
+    outFile.close();
+}
+
+string MarginalTree ::GetNewick() const
+{
+    // return the newick format of the tree (with length)
+    // method: just get the newick at the root node
+    return GetNewickAt(GetTotNodesNum() - 1);
+}
+string MarginalTree ::GetNewickSorted(bool fLen) const
+{
+    //
+    return GetNewickAt(GetTotNodesNum() - 1, true, fLen);
+}
+
+string MarginalTree ::GetNewickAt(int node, bool fSort, bool fLen) const
+{
+    // find its descendents
+    string res;
+    int childLeft = GetLeftDescendant(node);
+    int childRight = GetRightDescendant(node);
+    if (childLeft < 0)
+    {
+        // must be leaf
+        YW_ASSERT_INFO(IsLeaf(node) == true, "Wrong node in MT");
+        // for leaf, only ouput its label together with its length
+        char buf[100];
+        if (fLen == true)
+        {
+            sprintf(buf, "%d:%f", GetLabel(node), GetEdgeLen(node));
+        }
+        else
+        {
+            sprintf(buf, "%d", GetLabel(node));
+        }
+        res = buf;
+    }
+    else
+    {
+        // append two children's
+        if (childRight < 0)
+        {
+            Dump();
+        }
+        YW_ASSERT_INFO(childRight >= 0, "Left/right mismatch");
+        res = "(";
+        //res += GetNewickAt(childLeft);
+        //res +=",";
+        //res += GetNewickAt(childRight);
+        string strPart1 = GetNewickAt(childLeft, fSort, fLen);
+        string strPart2 = GetNewickAt(childRight, fSort, fLen);
+        string strToAdd;
+        if (fSort == false || strPart1 <= strPart2)
+        {
+            res += strPart1;
+            res += ",";
+            res += strPart2;
+        }
+        else
+        {
+            res += strPart2;
+            res += ",";
+            res += strPart1;
+        }
+        res += strToAdd;
+        res += ")";
+        if (fLen == true && node < GetTotNodesNum() - 1)
+        {
+            char buf[100];
+            sprintf(buf, ":%f", GetEdgeLen(node));
+            res += buf;
+        }
+    }
+    return res;
+}
+
+void MarginalTree ::GetLeavesUnder(int nn, set<int> &leavesUnder) const
+{
+    //
+    if (IsLeaf(nn) == true)
+    {
+        leavesUnder.insert(nn);
+    }
+    else
+    {
+        set<int> listChildren;
+        GetChildren(nn, listChildren);
+        for (set<int>::iterator it = listChildren.begin(); it != listChildren.end(); ++it)
+        {
+            GetLeavesUnder(*it, leavesUnder);
+        }
     }
-  }
 }
 
-// construct a marginal tree from nodes and parent info
-// NOTE: this function does not take distance. Therefore, we arbitarily assign
-// nodes to their respective heights and thus also assign branch length ALSO
-// NOTE: when we assign branch length, the branch length are set uniformly
-// distributed within [0-1].
-void InitMarginalTree(MarginalTree &mTree, int numLeaves,
-                      const vector<int> &listLabels,
-                      const vector<int> &listParentNodePos) {
-  // cout << "numLeaves = " << numLeaves << endl;
-  // cout << "InitMarginalTree: numLeaves = " << numLeaves << endl;
-  // cout << "listLabels = ";
-  // DumpIntVec(listLabels);
-  // cout << "listParentNodePos = ";
-  // DumpIntVec( listParentNodePos );
-  //
-  mTree.numLeaves = numLeaves;
-  mTree.listNodeLabels = listLabels;
-  mTree.listParentNodePos = listParentNodePos;
-
-  // now init edge dist
-  mTree.listEdgeDist.clear();
-  int numNonLeafNodes = listLabels.size() - numLeaves;
-  double unitLen = 1.0 / numNonLeafNodes;
-  for (int i = 0; i < (int)listLabels.size() - 1; ++i) {
-    int parPos = listParentNodePos[i] - numLeaves + 1;
-    // cout << "par = " << listParentNodePos[i]  << " for node i = " << i <<
-    // endl; cout << "normalized par pos = " << parPos << endl;
-    YW_ASSERT_INFO(parPos > 0, "Fatal error in InitMarginalTree");
-    if (i < numLeaves) {
-      // leaf
-      mTree.listEdgeDist.push_back(parPos * unitLen);
-    } else {
-      // need to subtract current pos
-      int curpos = i - numLeaves + 1;
-      // cout << "curpos = " << curpos << endl;
-      YW_ASSERT_INFO(curpos < parPos, "Trouble in InitMarginalTree");
-      mTree.listEdgeDist.push_back((parPos - curpos) * unitLen);
-    }
-  }
-  // the root has length-0 by default
-  mTree.listEdgeDist.push_back(0.0);
-  // also build up descendents
-  mTree.BuildDescendantInfo();
-}
-
-// find the neighborhood of marginal trees within one NNI operation away (incl.
-// the current tree)
-void FindOneNNIMTreesFrom(MarginalTree &mTreeSrc,
-                          vector<MarginalTree> &listNNITrees,
-                          vector<pair<int, int> > *pListPairEdgesSwapped) {
-  //
-  listNNITrees.clear();
-
-  // process each internal node (w/ at least three leaves below) of the mtree,
-  // and
-  for (int node = mTreeSrc.GetNumLeaves(); node < mTreeSrc.GetTotNodesNum();
-       ++node) {
-    //
-    int nodeLeft = mTreeSrc.GetLeftDescendant(node);
-    int nodeRight = mTreeSrc.GetRightDescendant(node);
-    if (mTreeSrc.IsLeaf(nodeLeft) == true &&
-        mTreeSrc.IsLeaf(nodeRight) == true) {
-      // skip if both children are leaves since in this case swapping has no
-      // effect
-      continue;
-    }
-    // now swap its two children's subtree in up to four ways
-    int nodesProc1[2], nodesProc2[2];
-    nodesProc1[0] = nodeLeft;
-    nodesProc1[1] = nodeRight;
-    nodesProc2[1] = nodeLeft;
-    nodesProc2[0] = nodeRight;
-    for (int ii = 0; ii < 2; ++ii) {
-      int n1Proc = nodesProc1[ii];
-      int n2Proc = nodesProc2[ii];
-      if (mTreeSrc.IsLeaf(n1Proc) == false) {
-        int node1Left = mTreeSrc.GetLeftDescendant(n1Proc);
-        int node1Right = mTreeSrc.GetRightDescendant(n1Proc);
-        YW_ASSERT_INFO(node1Left >= 0 && node1Right >= 0, "Can not miss");
-
-        // two choices to swap: n2Proc with one of the descendents
-        int nodesProc1Child[2];
-        nodesProc1Child[0] = node1Left;
-        nodesProc1Child[1] = node1Right;
-        for (int jj = 0; jj < 2; ++jj) {
-          MarginalTree mtreeNNI1 = mTreeSrc;
-          mtreeNNI1.SwapBranches(nodesProc1Child[jj], n2Proc);
-          mtreeNNI1.BuildDescendantInfo();
-          // cout << "After swap: \n";
-          // mtreeNNI1.Dump();
-          mtreeNNI1.RearrangeParIncOrder();
-          // cout << "Found a new mtreeNNI1: " << mtreeNNI1.GetNewick() << endl;
-          // mtreeNNI1.Dump();
-          mtreeNNI1.BuildDescendantInfo();
-          // sort by leaf id: YW: Feb 19,2016
-          mtreeNNI1.SortByLeafId();
-          mtreeNNI1.BuildDescendantInfo();
-          listNNITrees.push_back(mtreeNNI1);
-
-          if (pListPairEdgesSwapped != NULL) {
-            pair<int, int> pp(nodesProc1Child[jj], n2Proc);
-            pListPairEdgesSwapped->push_back(pp);
-          }
-          // cout << "After descendent rebult, " << mtreeNNI1.GetNewick() <<
-          // endl; mtreeNNI1.Dump();
-        }
-      }
-    }
-  }
-  // finally add self
-  listNNITrees.push_back(mTreeSrc);
-  // exit(1);
-}
-
-void CreateSubtreeFromLeaves(MarginalTree &mTreeOrig,
-                             const set<int> &setLeafLabels,
-                             MarginalTree &mTreeSub,
-                             map<int, int> &mapNewNodeToOldNode) {
-  // cout << "Original tree: " << mTreeOrig.GetNewick() << ": set of leaves to
-  // process: "; DumpIntSet( setLeafLabels );
-
-  // find a subset of trees with the desired leaves (as matching the given
-  // labels) mapNewNodeToOldNode: new node index ==> old node index
-  map<pair<int, set<int> >, int> mapShrunkLeavesWithNum;
-
-  // get all the clades
-  for (int i = 0; i < mTreeOrig.GetTotNodesNum(); ++i) {
-    //
-    set<int> setGetDesc;
-    mTreeOrig.GetLeavesUnder(i, setGetDesc);
-    set<int> setGetDescLbls;
-    for (set<int>::iterator it = setGetDesc.begin(); it != setGetDesc.end();
-         ++it) {
-      int lbl = mTreeOrig.GetLabel(*it);
-      setGetDescLbls.insert(lbl);
-    }
-    set<int> sIntsect;
-    JoinSets(setGetDescLbls, setLeafLabels, sIntsect);
-
-    // ignore empty nodes
-    if (sIntsect.size() <= 0) {
-      //
-      continue;
-    }
-
-    // save it
-    pair<int, set<int> > ss(sIntsect.size(), sIntsect);
-    if (mapShrunkLeavesWithNum.find(ss) == mapShrunkLeavesWithNum.end()) {
-      mapShrunkLeavesWithNum.insert(
-          map<pair<int, set<int> >, int>::value_type(ss, i));
-    } else {
-      // save the lower (smaller)
-      if (mapShrunkLeavesWithNum[ss] > i) {
-        mapShrunkLeavesWithNum[ss] = i;
-      }
-    }
-  }
-#if 0
-cout << "mapShrunkLeavesWithNum: ";
-for( map< pair<int,set<int> >, int > :: iterator it = mapShrunkLeavesWithNum.begin(); it != mapShrunkLeavesWithNum.end(); ++it )
+void MarginalTree ::GetlabelsFor(const set<int> &setPos, set<int> &setLbls) const
 {
-cout << "Size: " << it->first.first << ", orig. node = " << it->second << ", set of leaves: ";
-DumpIntSet( it->first.second);
-}
-#endif
-
-  // set up the old and new node position map
-  map<int, int> mapNewToOldPos, mapOldToNewPos;
-  set<int> setNewParsPosOld;
-  int index = 0;
-  for (map<pair<int, set<int> >, int>::iterator it =
-           mapShrunkLeavesWithNum.begin();
-       it != mapShrunkLeavesWithNum.end(); ++it, ++index) {
     //
-    mapNewToOldPos.insert(map<int, int>::value_type(index, it->second));
-    mapOldToNewPos.insert(map<int, int>::value_type(it->second, index));
-
-    setNewParsPosOld.insert(it->second);
-  }
-
-  // now init the tree: note edge labels are ignored!
-  mTreeSub.Clear();
-  mTreeSub.SetNumLeaves(setLeafLabels.size());
-  vector<int> listLbls;
-  PopulateVecBySet(listLbls, setLeafLabels);
-  for (int i = (int)setLeafLabels.size();
-       i < (int)mapShrunkLeavesWithNum.size(); ++i) {
-    // these are internal nodes
-    listLbls.push_back(-1);
-  }
-  mTreeSub.SetLabelList(listLbls);
-  vector<int> listParPos;
-  // now set up parent
-  for (int i = 0; i < (int)listLbls.size(); ++i) {
-    YW_ASSERT_INFO(mapNewToOldPos.find(i) != mapNewToOldPos.end(),
-                   "Fail to find2");
-    int posOrig = mapNewToOldPos[i];
-    int anc = mTreeOrig.GetFirstNonselfAnces(posOrig, setNewParsPosOld);
-    int posNewAnc = -1;
-    if (anc >= 0) {
-      YW_ASSERT_INFO(mapOldToNewPos.find(anc) != mapOldToNewPos.end(),
-                     "Fail to find3");
-      posNewAnc = mapOldToNewPos[anc];
-    }
-    listParPos.push_back(posNewAnc);
-  }
-  mTreeSub.SetParList(listParPos);
-
-  // create nodes mapping
-  mapNewNodeToOldNode = mapNewToOldPos;
-
-  //
-  mTreeSub.BuildDescendantInfo();
-
-  // YW: how do we assign branch length
-  UpdateBranchLenInSubtree(mTreeOrig, mapNewNodeToOldNode, mTreeSub);
-#if 0
-cout << "Constructed subtree: " << mTreeSub.GetNewick() << endl;
-mTreeSub.Dump();
-cout << "mapNewNodeToOldNode: ";
-for(map<int,int> :: iterator it=mapNewNodeToOldNode.begin(); it != mapNewNodeToOldNode.end(); ++it)
-{
-cout << "[" << it->first << "," << it->second << "]  ";
-}
-cout << endl;
-#endif
+    setLbls.clear();
+    for (set<int>::const_iterator it = setPos.begin(); it != setPos.end(); ++it)
+    {
+        setLbls.insert(GetLabel(*it));
+    }
 }
 
-void UpdateBranchLenInSubtree(MarginalTree &mTreeOrig,
-                              map<int, int> &mapNewNodeToOldNode,
-                              MarginalTree &mTreeSub) {
-  // inverse map
-  // map<int,int> mapOldNodeToNewNode;
-  // for( map<int,int> :: iterator it = mapNewNodeToOldNode.begin(); it !=
-  // mapNewNodeToOldNode.end(); ++it  )
-  //{
-  //    //
-  //    YW_ASSERT_INFO( mapOldNodeToNewNode.find(it->second) ==
-  //    mapOldNodeToNewNode.end(), "Wrong" ); mapOldNodeToNewNode.insert(
-  //    map<int,int> :: value_type(it->second, it->first) );
-  //}
-
-  //
-  vector<double> listBrLens;
-  for (map<int, int>::iterator it = mapNewNodeToOldNode.begin();
-       it != mapNewNodeToOldNode.end(); ++it) {
-    double distcur = 0.0;
-    //
-    int pnew = it->first;
-    int pold = it->second;
-    int pnewpar = mTreeSub.GetParent(pnew);
-    if (pnewpar >= 0) {
-      YW_ASSERT_INFO(mapNewNodeToOldNode.find(pnewpar) !=
-                         mapNewNodeToOldNode.end(),
-                     "Fail to find");
-      int poldpar = mapNewNodeToOldNode[pnewpar];
-      distcur = mTreeOrig.GetPathLen(pold, poldpar);
-    }
-
-    listBrLens.push_back(distcur);
-  }
-  mTreeSub.SetBranchLenList(listBrLens);
-}
-
-void FindMatchedSubtrees(MarginalTree &mtreeNew, MarginalTree &mtreeRef,
-                         map<int, int> &mapSTNewToRef) {
-  // find the shared subtrees that are in both trees, then create a map: map the
-  // subtree index in mtreeNew to mtreeRef find all branches (subtrees below
-  // them) that are not in the reference tree setDiffBrs: in this tree but not
-  // in reference tree setDiffRefMissed: in reference tree but not in this tree
-  vector<set<int> > listSubtreesNew, listSubtreesRef;
-  mtreeNew.ConsDecedentLeavesInfoLabels(listSubtreesNew);
-  mtreeRef.ConsDecedentLeavesInfoLabels(listSubtreesRef);
-
-  // create fast searching
-  map<set<int>, int> mapIndexSTRef;
-  for (int i = 0; i < (int)listSubtreesRef.size(); ++i) {
-    mapIndexSTRef.insert(map<set<int>, int>::value_type(listSubtreesRef[i], i));
-  }
-
-  //
-  mapSTNewToRef.clear();
-  for (int i = 0; i < (int)listSubtreesNew.size(); ++i) {
-    if (mapIndexSTRef.find(listSubtreesNew[i]) == mapIndexSTRef.end()) {
-      mapSTNewToRef.insert(
-          map<int, int>::value_type(i, mapIndexSTRef[listSubtreesNew[i]]));
-    }
-  }
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////
-// Define a utility class
+void MarginalTree ::GetLeafSetsForCuts(const vector<int> &listCuts, vector<set<int>> &listLeafSets) const
+{
+    // this function finds the cutted subtrees' leaf sets for the given set of cut edges
+    listLeafSets.clear();
 
-MarginalTree ::MarginalTree() : numLeaves(0) {
-  // here, we initiailize distance
-  // TBD
-}
-
-void MarginalTree ::Clear() {
-  numLeaves = 0;
-  listNodeLabels.clear();
-  listParentNodePos.clear();
-  listEdgeDist.clear();
-  listLeftDescs.clear();
-  listRightDescs.clear();
-}
-
-void MarginalTree ::BuildDescendantInfo() {
-  // Note, this only works for binary tree!!!!!
-  listLeftDescs.clear();
-  listRightDescs.clear();
-  int numNodes = GetTotNodesNum();
-  // cout << "BuildDescendantInfo: numNodes: " << numNodes << endl;
-  listLeftDescs.resize(numNodes);
-  listRightDescs.resize(numNodes);
-
-  // for leaves, there is no children
-  for (int i = 0; i < numNodes; ++i) {
-    listLeftDescs[i] = -1;
-    listRightDescs[i] = -1;
-  }
-
-  // handle other cases
-  for (int i = 0; i < numNodes; ++i) {
-    int p = GetParent(i);
-    // cout << "paret of " << i << " is " << p << endl;
-    if (p < 0) {
-      continue;
-    }
-    // setup p's child to i
-    if (listLeftDescs[p] < 0) {
-      listLeftDescs[p] = i;
-      // cout << "Set left descendent of " << p << " to be " << i << endl;
-    } else {
-      if (listRightDescs[p] >= 0) {
-        cout << "Something wrong: the current tree:";
-        Dump();
-      }
-
-      // make sure this is binary tree
-      YW_ASSERT_INFO(listRightDescs[p] < 0, "Not a binary tree2");
-      listRightDescs[p] = i;
-      // cout << "Set right descendent of " << p << " to be " << i << endl;
-    }
-  }
-}
-
-bool MarginalTree ::IsToplogicSame(const MarginalTree &tree) const {
-  // this function test whether two things are topologically the same
-  if (GetTotNodesNum() != tree.GetTotNodesNum()) {
-    // cout << "Tree node numbers are not equal\n";
-    // nodes number are different, then different
-    return false;
-  }
-  //
-  if (GetNumLeaves() != tree.GetNumLeaves()) {
-    // cout << "Tree leaf numbers are not equal\n";
-    return false;
-  }
-  // make sure node id the same
-  // if( listNodeLabels != tree.listNodeLabels )
-  //{
-// cout << "Tree node ids are not equal\n";
-//        return false;
-//    }
-#if 0
-    if( listParentNodePos != tree.listParentNodePos  )
+    // we first create a map of whether an edge mutate or not
+    vector<bool> mapEdgeMutFlags;
+    for (int i = 0; i < this->GetTotNodesNum(); ++i)
     {
-//cout << "Tree node parents are not equal\n";
-        return false;
+        mapEdgeMutFlags.push_back(false);
+    }
+    for (int i = 0; i < (int)listCuts.size(); ++i)
+    {
+        mapEdgeMutFlags[listCuts[i]] = true;
     }
-#endif
 
-  // sort the leaves
-  // MarginalTree t1 = *this;
-  // MarginalTree t2 = tree;
-  // t1.SortByLeafId();
-  // t2.SortByLeafId();
-  vector<set<int> > t1splits, t2splits;
-  ConsDecedentLeavesInfo(t1splits);
-  tree.ConsDecedentLeavesInfo(t2splits);
-  set<set<int> > st1splits, st2splits;
-  for (int i = 0; i < (int)t1splits.size(); ++i) {
-    st1splits.insert(t1splits[i]);
-  }
-  for (int i = 0; i < (int)t2splits.size(); ++i) {
-    st2splits.insert(t2splits[i]);
-  }
-
-  if (st1splits != st2splits) {
-    // cout << "Tree node parents are not equal\n";
-    // cout << "** tree 1: \n";
-    // for(int i=0; i<(int)t1splits.size(); ++i )
-    //{
-    // DumpIntSet(t1splits[i]);
-    //}
-    // cout << "** tree 2: \n";
-    // for(int i=0; i<(int)t2splits.size(); ++i )
-    //{
-    // DumpIntSet(t2splits[i]);
-    //}
-    return false;
-  }
-
-  return true;
-}
-
-int MarginalTree ::GetLeftDescendant(int node) const {
-  YW_ASSERT_INFO((int)listLeftDescs.size() == GetTotNodesNum() &&
-                     (int)listRightDescs.size() == GetTotNodesNum(),
-                 "descendant info not set");
-  return listLeftDescs[node];
-}
-int MarginalTree ::GetRightDescendant(int node) const {
-  YW_ASSERT_INFO((int)listLeftDescs.size() == GetTotNodesNum() &&
-                     (int)listRightDescs.size() == GetTotNodesNum(),
-                 "descendant info not set");
-  return listRightDescs[node];
-}
-
-int MarginalTree ::GetFirstNonselfAnces(int v, const set<int> &setAnces) const {
-  // find the first non-self ancestor from the list; if not found return -1
-  int res = -1;
-
-  int ncv = v;
-  while (ncv >= 0) {
-    // get parent
-    ncv = GetParent(ncv);
-    if (setAnces.find(ncv) != setAnces.end()) {
-      res = ncv;
-      break;
-    }
-  }
-
-  return res;
-}
-
-void MarginalTree ::InitDefaultEdgeLen() {
-  listEdgeDist.clear();
-
-  // the default assume the following:
-  // (a) all leaves are on the same level
-  // (b) the rest of tree nodes are orgnized uniformly in distance
-  for (int i = 0; i < GetTotNodesNum() - 1; ++i) {
-    double distRel = GetDefaultEdgeLen(i);
-    listEdgeDist.push_back(distRel);
-  }
-  // the root has no edge here
-  listEdgeDist.push_back(0.0);
-}
-
-void MarginalTree ::InitUnitEdgelen() {
-  //
-  listEdgeDist.clear();
-
-  // the default assume the following:
-  // (a) all leaves are on the same level
-  // (b) the rest of tree nodes are orgnized uniformly in distance
-  for (int i = 0; i < GetTotNodesNum() - 1; ++i) {
-    listEdgeDist.push_back(1.0);
-  }
-  // the root has no edge here
-  listEdgeDist.push_back(0.0);
-}
-
-double MarginalTree ::GetDefaultEdgeLen(int child) {
-  int curpos = child;
-  int parpos = listParentNodePos[child];
-
-  int punorm = CalcNormHeight(parpos);
-  int plnorm = CalcNormHeight(curpos);
-  int numLeaves = GetNumLeaves();
-
-  if (punorm >= numLeaves) {
-    punorm = numLeaves - 1;
-  }
-  if (plnorm >= numLeaves) {
-    plnorm = numLeaves - 1;
-  }
-  // YW: changed back to old distance, 082306, to see if this matters
-  double res =
-      2.0 * (1.0 / (numLeaves - punorm) - 1.0 / (numLeaves - plnorm + 1));
-  // cout << "numLeaves = " << numLeaves << ", punorm = " <<  punorm << ",
-  // plnorm = " << plnorm << ", res = " << res << endl;
-  // here we assume the distrbution of time is according to exponential
-  // distibution of mean 2.0/k(k+1) waiting time
-  return res;
-}
-
-void MarginalTree ::SetParent(int child, int par, bool fAdjLen) {
-  YW_ASSERT_INFO(child < GetTotNodesNum() && par < GetTotNodesNum(),
-                 "Wrong here");
-  listParentNodePos[child] = par;
-  // also setup height
-  if (fAdjLen == true) {
-    listEdgeDist[child] = GetDefaultEdgeLen(child);
-  }
-}
-
-void MarginalTree ::SwapBranches(int nodeBranch1, int nodeBranch2) {
-  // cout << "Swapping nodes: " << nodeBranch1 << ", " << nodeBranch2 << endl;
-  // swap two branches ending at the two nodes passed in; here assume the branch
-  // length will not change note: may need to reset some other descendents' info
-  // after this
-  int p1 = GetParent(nodeBranch1);
-  int p2 = GetParent(nodeBranch2);
-  SetParent(nodeBranch1, p2, false);
-  SetParent(nodeBranch2, p1, false);
-}
-
-int MarginalTree ::CalcNormHeight(int node) {
-  int normHt = node - (GetNumLeaves() - 1);
-  if (normHt < 0) {
-    normHt = 0;
-  }
-  return normHt;
-}
-
-void MarginalTree ::Binarize() {
-  // first initialize distance if not yet
-  if (listEdgeDist.size() == 0) {
-    InitDefaultEdgeLen();
-  }
-
-  // assume distance has been set properly
-  YW_ASSERT_INFO(listEdgeDist.size() > 0, "Tree edge length not set");
-
-  // This function makes this marginal binary
-  vector<int> updatedLabels, updatedPars;
-  vector<double> updatedDist;
-
-  // find out the current largest label, for the purpose of adding new labels
-  int maxLabel = -1;
-  for (int i = 0; i < (int)listNodeLabels.size(); ++i) {
-    if (listNodeLabels[i] > maxLabel) {
-      maxLabel = listNodeLabels[i];
-    }
-  }
-  int labelNextToUse = maxLabel + 1;
-
-  // before doing anything, get the descendent info for each tree node
-  vector<vector<int> > listDescendentsVec;
-  ConsDecedentInfo(listDescendentsVec);
-  // vector< set<int> > listDescendents;
-  // for( unsigned int i=0; i<listDescendents.size(); ++i )
-  //{
-  //    set<int> tmpSet;
-  //    PopulateSetByVec( tmpSet,listDescendentsVec[i]  );
-  //    listDescendents.push_back(tmpSet);
-  //}
-
-  // we need another auxilary data structure to map old position to new position
-  // we need this because we are adding some new nodes between two old nodes
-  vector<int> mapOldPosToNewPos(GetTotNodesNum());
-
-  // first copy every thing up to the leaves
-  for (int i = 0; i < numLeaves; ++i) {
-    updatedLabels.push_back(listNodeLabels[i]);
-    updatedPars.push_back(listParentNodePos[i]);
-    updatedDist.push_back(listEdgeDist[i]);
-
-    // leaf is never changed position
-    mapOldPosToNewPos[i] = i;
-  }
-  // now we treat each internal node one by one, and split it when needed
-  for (int i = numLeaves; i < GetTotNodesNum(); ++i) {
-    // the first thing to do is: find children from the constructed portion of
-    // tree
-    vector<int> &listChildren = listDescendentsVec[i];
-    // cout << "IN node = " << i << ", children num = " << listChildren.size()
-    // << endl;
-
-    // do nothing if there is no mor than 2 children
-    // it is possible that an internal node does not have any children
-    // Then what to do here? TBD
-    if (listChildren.size() == 2 || listChildren.size() == 0) {
-      // cout << "Simply go over the originals...\n";
-      updatedLabels.push_back(listNodeLabels[i]);
-      updatedPars.push_back(listParentNodePos[i]); // do it for now, will update
-                                                   // later
-      updatedDist.push_back(listEdgeDist[i]);
-
-      // record current position
-      mapOldPosToNewPos[i] = (int)updatedLabels.size() - 1;
-
-      // now update its children's parent to this new location
-      for (int jjj = 0; jjj < (int)listChildren.size(); ++jjj) {
-        int oldpos = listChildren[jjj];
-        int newpos = mapOldPosToNewPos[oldpos];
-        updatedPars[newpos] = mapOldPosToNewPos[i];
-      }
-
-      continue;
-    }
-    if (listChildren.size() == 1) {
-      // we should remove this node
-      int childOldPos = listChildren[0];
-      // skip this node, but update the node
-      // let its (only) child points to its parent
-      // cout << "childOldPos = " << childOldPos << endl;
-      listParentNodePos[childOldPos] = listParentNodePos[i];
-      // cout << "childOldPos's parent set to  = " << listParentNodePos[i] <<
-      // endl;
-
-      // also update listChildren
-      if (listParentNodePos[i] >= 0) {
-        int pppos = listParentNodePos[i];
-        vector<int> listNewChildAtIParent;
-        for (int ii = 0; ii < (int)listDescendentsVec[pppos].size(); ++ii) {
-          if (i != listDescendentsVec[pppos][ii]) {
-            // do not append i anymore
-            listNewChildAtIParent.push_back(listDescendentsVec[pppos][ii]);
-          }
-        }
-        //
-        YW_ASSERT_INFO((int)listNewChildAtIParent.size() ==
-                           (int)listDescendentsVec[pppos].size() - 1,
-                       "Something wrong");
-        // append a new thing
-        listNewChildAtIParent.push_back(childOldPos);
-        // update the orginal list
-        listDescendentsVec[pppos] = listNewChildAtIParent;
-      } else {
-        int newpos = mapOldPosToNewPos[childOldPos];
-        updatedPars[newpos] = -1;
-        updatedDist[newpos] = 0.0;
-      }
-      continue;
-    }
-
-    // otherwise, we have to split the node
-    for (int jjj = 0; jjj < (int)listChildren.size() - 2; ++jjj) {
-      updatedLabels.push_back(labelNextToUse++); // new IN is assigned an
-                                                 // arbitary label
-      updatedPars.push_back(-1); // do it for now, will update later
-      // for any new internal node, edge length (out of it) is 0
-      updatedDist.push_back(0.0);
-
-      // now update children
-      int curINPos = (int)updatedLabels.size() - 1;
-      if (jjj == 0) {
-        // Then we use the first original child
-        int oldpos = listChildren[0];
-        int newpos = mapOldPosToNewPos[oldpos];
-        updatedPars[newpos] = curINPos;
-      } else {
-        // otherwise, we use the previous IN
-        updatedPars[curINPos - 1] = curINPos;
-      }
-      // the right branch is always an original branch
-      int oldpos = listChildren[jjj + 1];
-      int newpos = mapOldPosToNewPos[oldpos];
-      updatedPars[newpos] = curINPos;
-    }
-    // now we append the original internal node in
-    updatedLabels.push_back(listNodeLabels[i]);
-    updatedPars.push_back(listParentNodePos[i]); // do it for now, will update
-                                                 // later
-    updatedDist.push_back(listEdgeDist[i]);
-
-    // record current position
-    mapOldPosToNewPos[i] = (int)updatedLabels.size() - 1;
-
-    // update its two children, one of them is the last new node to add
-    updatedPars[(int)updatedPars.size() - 2] = mapOldPosToNewPos[i];
-    int oldpos = listChildren[(int)listChildren.size() - 1];
-    int newpos = mapOldPosToNewPos[oldpos];
-    updatedPars[newpos] = mapOldPosToNewPos[i];
-  }
-  // finally, we update the mtree
-  this->listNodeLabels = updatedLabels;
-  this->listParentNodePos = updatedPars;
-  this->listEdgeDist = updatedDist;
-
-  // check to make sure this is indeed binary
-  YW_ASSERT_INFO(this->listNodeLabels.size() == this->listParentNodePos.size(),
-                 "In binaralize: size wrong1");
-  YW_ASSERT_INFO(this->listNodeLabels.size() == this->listEdgeDist.size(),
-                 "In binaralize: size wrong1");
-  // now iterator the degree
-#if 0
-    vector<int> nodeOutDegrees;
-    for(int i=0; i<(int)this->listNodeLabels.size(); ++i)
+    // we start by bottom up way to traversal all nodes
+    vector<set<int>> nodesLeaves(this->GetTotNodesNum());
+    for (int i = 0; i < this->GetNumLeaves(); ++i)
     {
-        nodeOutDegrees.push_back( 0 );
+        // all leave nodes are trivial
+        nodesLeaves[i].insert(i);
     }
-    for(int i=0; i<(int)this->listNodeLabels.size(); ++i)
+    // test for all nodes
+    for (int i = 0; i < this->GetTotNodesNum(); ++i)
     {
-        int ppos = listParentNodePos[i] ;
-        YW_ASSERT_INFO( ppos < (int)listParentNodePos.size(), "pos wrong" );
-        if( ppos >= 0 )
+        // if the edge is cut, we have found an partition or it is a root
+        if (mapEdgeMutFlags[i] == true || i == this->GetTotNodesNum() - 1)
         {
-            nodeOutDegrees[ ppos ]++;
-            if( nodeOutDegrees[ ppos ] >= 3 )
+            if (nodesLeaves[i].size() > 0)
             {
-                YW_ASSERT_INFO( false, "Error in binarinize." );
+                //cout << "Found one partition: ";
+                //DumpIntSet( nodesLeaves[i] );
+                listLeafSets.push_back(nodesLeaves[i]);
             }
         }
+        else
+        {
+            // otherwise propagate to above
+            UnionSets(nodesLeaves[this->GetParent(i)], nodesLeaves[i]);
+        }
     }
-#endif
-  // Dump();
-}
-
-void MarginalTree ::Consolidate() {
-  // cout << "Before consolidate, tree = ";
-  // this->Dump();
-  // Remove degree-2 intermediate nodes
-  // first find out which nodes are those to be removed
-  set<int> nodesToDel;
-  // this is very simple: scan parent list
-  // if a node (non-leaf) only appears at most once of them, then remove it
-  vector<int> occurTimes;
-  vector<bool> nodeVisitedFlags;
-  for (int i = 0; i < GetTotNodesNum(); ++i) {
-    occurTimes.push_back(0);
-    nodeVisitedFlags.push_back(false);
-  }
-  stack<int> nodesToExplore;
-  for (int i = 0; i < GetNumLeaves(); ++i) {
-    nodesToExplore.push(i);
-  }
-  while (nodesToExplore.empty() == false) {
-    // find one node
-    int node = nodesToExplore.top();
-    nodesToExplore.pop();
-
-    // if this is already visited, skip
-    if (nodeVisitedFlags[node] == true) {
-      continue;
-    }
-    // this is a new node, so explore it
-    nodeVisitedFlags[node] = true;
-    int pp = GetParent(node);
-    if (pp >= 0) {
-      nodesToExplore.push(pp);
-      occurTimes[pp]++;
-    }
-  }
-  // now figure out how many to remove up to a point
-  vector<int> listNumDelItems;
-  for (int i = 0; i < GetNumLeaves(); ++i) {
-    listNumDelItems.push_back(0);
-  }
-  int numToDelete = 0;
-  for (int i = GetNumLeaves(); i < GetTotNodesNum(); ++i) {
-    if (occurTimes[i] <= 1 && i != GetTotNodesNum() - 1) {
-      numToDelete++;
-    }
-    listNumDelItems.push_back(numToDelete);
-  }
-
-  // now store a new set of items
-  vector<int> listNodeLabelsNew;
-  vector<int> listParentNodePosNew;
-  vector<double> listEdgeDistNew;
-  // now mark those with at most once to be deleted
-  for (int i = 0; i < GetTotNodesNum(); ++i) {
-    // leaves and the root is always there
-    if (occurTimes[i] > 1 || i < GetNumLeaves() || i == GetTotNodesNum() - 1) {
-      listNodeLabelsNew.push_back(listNodeLabels[i]);
-
-      // for parent, we trace upwards until either find a occur time > 1 or root
-      double distNew = listEdgeDist[i];
-      int parNew = GetParent(i);
-      // now trace back to see if we need them
-      while (occurTimes[parNew] <= 1 && parNew >= 0) {
-        int parNext = GetParent(parNew);
-        if (parNext < 0) {
-          break;
-        }
-        distNew += listEdgeDist[parNew];
-        parNew = parNext;
-      }
-
-      // save this (and make adjustment)
-      int parToSet = parNew - listNumDelItems[parNew];
-      if (parToSet < 0) {
-        parToSet = -1;
-      }
-      listParentNodePosNew.push_back(parToSet);
-      listEdgeDistNew.push_back(distNew);
-    }
-  }
-
-  // finally store this
-  listNodeLabels = listNodeLabelsNew;
-  listParentNodePos = listParentNodePosNew;
-  listEdgeDist = listEdgeDistNew;
-
-  // cout << "After consolidate, tree = ";
-  // this->Dump();
-}
-
-double MarginalTree ::GetEdgeLen(int childNodeIndex) const {
-  YW_ASSERT_INFO(childNodeIndex < (int)listEdgeDist.size(), "List overflow");
-  return listEdgeDist[childNodeIndex];
-}
-
-double MarginalTree ::GetTotEdgeLen() const {
-  //
-  double res = 0.0;
-  for (int i = 0; i < GetTotNodesNum(); ++i) {
-    if (i != GetRoot()) {
-      res += GetEdgeLen(i);
-    }
-  }
-  return res;
-}
-
-void MarginalTree ::ConsDecedentInfo(vector<vector<int> > &descNodes) const {
-  descNodes.clear();
-  int numNodes = GetTotNodesNum();
-  // vector< vector<int> > listDescendents;
-  for (int i = 0; i < numNodes; ++i) {
-    vector<int> emptyVec;
-    descNodes.push_back(emptyVec);
-  }
-  for (int i = 0; i < numNodes; ++i) {
-    int parpos = listParentNodePos[i];
-    if (parpos >= 0) {
-      descNodes[parpos].push_back(i);
-    }
-  }
-  // cout << "Descedents info:\n";
-  // for( unsigned int i=0; i<descNodes.size(); ++i)
-  //{
-  // DumpIntVec( descNodes[i] );
-  //}
-}
-
-void MarginalTree ::ConsAllDecedentInfo(vector<set<int> > &descNodes,
-                                        bool fIncSelf) const {
-  descNodes.clear();
-  int numNodes = GetTotNodesNum();
-  // vector< vector<int> > listDescendents;
-  for (int i = 0; i < numNodes; ++i) {
-    set<int> emptySet;
-    descNodes.push_back(emptySet);
-  }
-  for (int i = 0; i < numNodes; ++i) {
-    // Always contain itself if set
-    if (fIncSelf == true) {
-      descNodes[i].insert(i);
-    }
-
-    int parpos = listParentNodePos[i];
-    if (parpos >= 0) {
-      UnionSets(descNodes[parpos], descNodes[i]);
-      if (fIncSelf == false) {
-        // otherwise, we need to append this current node to
-        descNodes[parpos].insert(i);
-      }
-    }
-  }
-}
-
-void MarginalTree ::ConsDecedentLeavesInfo(vector<set<int> > &descLaves) const {
-  descLaves.clear();
-  // vector< vector<int> > listDescendents;
-  int numNodes = GetTotNodesNum();
-  for (int i = 0; i < numNodes; ++i) {
-    set<int> emptyVec;
-    descLaves.push_back(emptyVec);
-  }
-  for (int i = 0; i < numNodes; ++i) {
-    // If this is a leave, push itself into
-    if (i < numLeaves) {
-      descLaves[i].insert(i);
-    }
-
-    int parpos = listParentNodePos[i];
-    if (parpos >= 0) {
-      UnionSets(descLaves[parpos], descLaves[i]);
-    }
-  }
-  // cout << "Descedents info:\n";
-  // for( unsigned int i=0; i<descLaves.size(); ++i)
-  //{
-  // DumpIntSet( descLaves[i] );
-  //}
-}
-
-void MarginalTree ::ConsDecedentLeavesInfoLabels(
-    vector<set<int> > &leafNodeLabels) const {
-  //
-  leafNodeLabels.clear();
-  vector<set<int> > leafNodePos;
-  ConsDecedentLeavesInfo(leafNodePos);
-  for (int i = 0; i < (int)leafNodePos.size(); ++i) {
-    set<int> ss;
-    for (set<int>::const_iterator it = leafNodePos[i].begin();
-         it != leafNodePos[i].end(); ++it) {
-      ss.insert(GetLabel(*it));
-    }
-    leafNodeLabels.push_back(ss);
-  }
-}
-
-void MarginalTree ::FindAllSplits(vector<set<int> > &listSplits) const {
-  //
-  listSplits.clear();
-  // vector< vector<int> > listDescendents;
-  int numNodes = GetTotNodesNum();
-  for (int i = 0; i < numNodes; ++i) {
-    set<int> emptyVec;
-    listSplits.push_back(emptyVec);
-  }
-  for (int i = 0; i < numNodes; ++i) {
-    // If this is a leave, push itself into
-    if (i < numLeaves) {
-      listSplits[i].insert(GetLabel(i));
-    }
-
-    int parpos = listParentNodePos[i];
-    if (parpos >= 0) {
-      UnionSets(listSplits[parpos], listSplits[i]);
-    }
-  }
-}
-
-int MarginalTree ::GetParent(int child) const {
-  if (child >= GetTotNodesNum()) {
-    cout << "child = " << child << ", tot num of nodes = " << GetTotNodesNum()
-         << endl;
-  }
-  YW_ASSERT_INFO(child < GetTotNodesNum(), "Range bug");
-  return listParentNodePos[child];
-}
-
-void MarginalTree ::ConsHeightsInfo(vector<int> &nodesHt) const {
-  nodesHt.clear();
-  int numNodes = GetTotNodesNum();
-  for (int i = 0; i < numNodes; ++i) {
-    nodesHt.push_back(0);
-  }
-  for (int i = 0; i < numNodes; ++i) {
-    // test whether the parent node should be updated its height
-    int parpos = listParentNodePos[i];
-    if (parpos >= 0 && nodesHt[parpos] < nodesHt[i] + 1) {
-      nodesHt[parpos] = nodesHt[i] + 1;
-    }
-  }
-}
-
-void MarginalTree ::Dump() const {
-  // Output marginal tree states
-  cout << "Tree: number of leaves: " << numLeaves << endl;
-  cout << "Node list = ";
-  DumpIntVec(this->listNodeLabels);
-  cout << "Parent list = ";
-  DumpIntVec(this->listParentNodePos);
-  cout << "Tree dist = ";
-  DumpDoubleVec(this->listEdgeDist);
-}
-
-int MarginalTree ::GetPosForLabel(int lbl) const {
-  //
-  int res = -1;
-  for (int i = 0; i < (int)listNodeLabels.size(); ++i) {
-    if (listNodeLabels[i] == lbl) {
-      res = i;
-      break;
-    }
-  }
-  return res;
-}
-
-int MarginalTree ::GetMRCA(int v1, int v2) const {
-  // retrieve MRCA from it
-  // cout << "v1 = " << v1 << ", v2= " << v2 << endl;
-  int n1 = v1, n2 = v2;
-  while (n1 != n2) {
-    // we alternatively move up, depend on which one is smaller
-    if (n1 < n2) {
-      // move n1
-      n1 = GetParent(n1);
-    } else {
-      // move n2
-      n2 = GetParent(n2);
-    }
-    // cout << "GetMRCA1: n1 = " << n1 << ", n2 = " << n2 << endl;
-  }
-  // n1 (or n2) is the result)
-  return n1;
-}
-
-void MarginalTree ::GetChildren(int node, set<int> &listChildren) const {
-  listChildren.clear();
-
-  // we just search parent list to see who has entry equal to node
-  for (int i = 0; i < (int)listParentNodePos.size(); ++i) {
-    if (listParentNodePos[i] == node) {
-      listChildren.insert(i);
-    }
-  }
-}
-
-int MarginalTree ::GetMaxHt() const {
-  vector<int> heights;
-  ConsHeightsInfo(heights);
-  int maxHt = 0;
-  for (int i = 0; i < (int)heights.size(); ++i) {
-    if (maxHt < heights[i]) {
-      maxHt = heights[i];
-    }
-  }
-  return maxHt;
-}
-
-double MarginalTree ::GetHeight() const {
-  int root = GetRoot();
-  return GetHeightOfNode(root);
-}
-double MarginalTree ::GetHeightOfNode(int node) const {
-  // get descendent
-  int lchild = GetLeftDescendant(node);
-  int rchild = GetRightDescendant(node);
-  if (lchild < 0 || rchild < 0) {
-    return 0.0;
-  }
-  return max(GetEdgeLen(lchild) + GetHeightOfNode(lchild),
-             GetEdgeLen(rchild) + GetHeightOfNode(rchild));
-}
-
-void MarginalTree ::RemoveLeafNodeFromBinaryTree(int lfn) {
-  YW_ASSERT_INFO(IsLeaf(lfn) == true, "Not a leaf");
-  // rmeove a leaf node (and suppress the degree-2 node if so
-  // first fill in leaves
-  vector<int> listNodeLabelsNew;
-  vector<int> listParentNodePosNew;
-  int pp = GetParent(lfn);
-  for (int i = 0; i < GetTotNodesNum(); ++i) {
-    if (i != lfn && i != pp) {
-      listNodeLabelsNew.push_back(this->listNodeLabels[i]);
-
-      int parNew;
-      int oldPar = GetParent(i);
-      if (oldPar < pp) {
-        // just minus 1
-        parNew = oldPar - 1;
-      } else if (oldPar > pp) {
-        // otherwise, we lost two
-        parNew = oldPar - 2;
-      } else {
-        // In this case, we are pointing to pp, since pp is removed, we need to
-        // move up by one
-        parNew = GetParent(pp) - 2;
-      }
-      if (parNew < 0) {
-        parNew = -1;
-      }
-      listParentNodePosNew.push_back(parNew);
-    }
-  }
-  //
-  this->listNodeLabels = listNodeLabelsNew;
-  this->listParentNodePos = listParentNodePosNew;
-
-  this->numLeaves--;
-}
-
-bool MarginalTree ::AreTwoPathsDisjoint(int sn1, int en1, int sn2,
-                                        int en2) const {
-  // test whether two path (sn1, en1) and (sn2, en2) are (vertex) disjoint
-  // note that for binary tree, this is also checking for edge disjoint
-  // we use a dumb method here
-  set<int> nodesVisitedTree1;
-
-  int n1 = sn1, n2 = en1;
-  nodesVisitedTree1.insert(n1);
-  nodesVisitedTree1.insert(n2);
-  while (n1 != n2) {
-    // we alternatively move up, depend on which one is smaller
-    int nodeNew;
-    if (n1 < n2) {
-      // move n1
-      n1 = GetParent(n1);
-      nodeNew = n1;
-    } else {
-      // move n2
-      n2 = GetParent(n2);
-      nodeNew = n2;
+}
+
+int MarginalTree ::GetMRCAForNodes(const set<int> &listNodes) const
+{
+    // find mrca of a list of nodes
+    // we use a priority queue, each time, we try to find
+    priority_queue<int> queueNodesToCheck;
+    set<int> nodesVisited;
+
+    for (set<int>::iterator it = listNodes.begin(); it != listNodes.end(); ++it)
+    {
+        queueNodesToCheck.push((*it) * (-1));
     }
+    while (queueNodesToCheck.size() > 1)
+    {
+        int curn = -queueNodesToCheck.top();
+        queueNodesToCheck.pop();
 
-    //
-    nodesVisitedTree1.insert(nodeNew);
-  }
-  // cout << "Path 1=";
-  // DumpIntSet( nodesVisitedTree1 );
-  // now we move on to the next pair
-  n1 = sn2;
-  n2 = en2;
-  if (nodesVisitedTree1.find(n1) != nodesVisitedTree1.end() ||
-      nodesVisitedTree1.find(n2) != nodesVisitedTree1.end()) {
-    return false;
-  }
-  while (n1 != n2) {
-    // we alternatively move up, depend on which one is smaller
-    int nodeNew;
-    if (n1 < n2) {
-      // move n1
-      n1 = GetParent(n1);
-      nodeNew = n1;
-    } else {
-      // move n2
-      n2 = GetParent(n2);
-      nodeNew = n2;
+        // in case there are duplicate ones, remove these duplicate copies
+        // this can happen if one node is another node's parent
+        if (-queueNodesToCheck.top() == curn)
+        {
+            // don't work on this, wait for the next one
+            continue;
+        }
+
+        // is this visited
+        int pp = this->GetParent(curn);
+        //cout << "Processing curn: " << curn << ", parent: " << pp << endl;
+        if (nodesVisited.find(pp) == nodesVisited.end())
+        {
+            // new node
+            nodesVisited.insert(pp);
+            // push to queue
+            queueNodesToCheck.push(-1 * pp);
+        }
     }
+    int res = -1 * queueNodesToCheck.top();
+    return res;
+}
 
+bool MarginalTree ::IsNodeUnder(int nn, int ancesNode) const
+{
     //
-    if (nodesVisitedTree1.find(nodeNew) != nodesVisitedTree1.end()) {
-      return false;
+    if (nn > ancesNode)
+    {
+        return false;
+    }
+    int curn = nn;
+    while (curn < ancesNode && curn >= 0)
+    {
+        curn = this->GetParent(curn);
+    }
+    if (curn == ancesNode)
+    {
+        return true;
+    }
+    else
+    {
+        return false;
     }
-  }
-
-  return true;
 }
 
-int MarginalTree ::GetPath(int sn, int en, set<int> &edgesOnPath) const {
-  // find edges on the path, and return the MRCA
-  int n1 = sn, n2 = en;
-  edgesOnPath.insert(n1);
-  edgesOnPath.insert(n2);
-  while (n1 != n2) {
-    // we alternatively move up, depend on which one is smaller
-    int nodeNew;
-    if (n1 < n2) {
-      // move n1
-      n1 = GetParent(n1);
-      nodeNew = n1;
-    } else {
-      // move n2
-      n2 = GetParent(n2);
-      nodeNew = n2;
+void MarginalTree ::RandPermuateLeaves()
+{
+    // randomly permuate the leaves of the tree
+    // we do this by shuffeling the parent of the leaves
+    vector<int> parentsNewIndices;
+    GetRandVector(parentsNewIndices, 0, GetNumLeaves() - 1);
+    //cout << "Dump Random vector: ";
+    //DumpIntVec( parentsNewIndices );
+    // now shuffling it
+    vector<int> leavesParNew;
+    for (int i = 0; i < (int)parentsNewIndices.size(); ++i)
+    {
+        leavesParNew.push_back(GetParent(parentsNewIndices[i]));
     }
+    // now assign it
+    for (int i = 0; i < (int)parentsNewIndices.size(); ++i)
+    {
+        SetParent(i, leavesParNew[i]);
+    }
+}
+
+int MarginalTree ::GetTriple(int i, int j, int k) const
+{
+    // ensure order of a,b,c first
+    OrderInt(i, j);
+    OrderInt(i, k);
+    OrderInt(j, k);
 
     //
-    edgesOnPath.insert(nodeNew);
-  }
-  // remove MRCA from result
-  YW_ASSERT_INFO(edgesOnPath.find(n1) != edgesOnPath.end(), "wrong2");
-  edgesOnPath.erase(n1);
-
-  return n1;
-}
-
-double MarginalTree ::GetPathLen(int sn, int en) {
-  // get the branch lenggth on the path
-  double res = 0.0;
-
-  set<int> edgesOnPath;
-  int mrca = GetPath(sn, en, edgesOnPath);
-  YW_ASSERT_INFO(edgesOnPath.find(mrca) == edgesOnPath.end(), "Fail to find");
-  for (set<int>::iterator it = edgesOnPath.begin(); it != edgesOnPath.end();
-       ++it) {
-    res += GetEdgeLen(*it);
-  }
-  return res;
-}
-
-void MarginalTree ::OutputGML(const char *fileName) const {
-  // Now output a file in GML format
-  // First create a new name
-  string name = fileName;
-  // cout << "num edges = " << listEdges.size() << endl;
-
-  DEBUG("FileName=");
-  DEBUG(name);
-  DEBUG("\n");
-  // Now open file to write out
-  ofstream outFile(name.c_str());
-
-  // First output some header info
-  outFile << "graph [\n";
-  outFile << "comment ";
-  OutputQuotedString(outFile, "Automatically generated by Graphing tool");
-  outFile << "\ndirected  1\n";
-  outFile << "id  1\n";
-  outFile << "label ";
-  OutputQuotedString(outFile, "Marginal Tree....\n");
-
-  // Now output all the vertices
-  //	int i;
-
-  // cout << "a.1.1\n";
-  for (int i = 0; i < (int)listNodeLabels.size(); ++i) {
-    outFile << "node [\n";
-
-    outFile << "id " << i << endl;
-    outFile << "label ";
-    char buf[80];
-    //        sprintf(buf, "n%d",  listNodeLabels[i]  );
-    sprintf(buf, "n%d", i);
+    // is these have different triples on T1 and T2?
+    // we do this by getting MRCA for all pairs of MRCAs
+    int mrcaij1 = GetMRCA(i, j);
+    int mrcajk1 = GetMRCA(j, k);
+    int mrcaik1 = GetMRCA(i, k);
+
+    // now just test exhustively
+    if (mrcaij1 == mrcajk1)
+    {
+        return 3;
+    }
+    else if (mrcaij1 == mrcaik1)
+    {
+        return 2;
+    }
+    else
+    {
+        return 1;
+    }
+}
 
-    OutputQuotedString(outFile, buf);
-    outFile << endl;
+int MarginalTree ::GetSibling(int a) const
+{
+    // get sibling of the node (leaf or non-leaf)
+    int par = GetParent(a);
+    int lc = GetLeftDescendant(par);
+    int rc = GetRightDescendant(par);
 
-    // See if we need special shape here
-    outFile << "defaultAtrribute   1\n";
+    YW_ASSERT_INFO(a == lc || a == rc, "Very wrong");
+    if (a == lc)
+    {
+        return rc;
+    }
+    else
+    {
+        return lc;
+    }
+}
 
-    outFile << "]\n";
-  }
-  // cout << "a.1.3\n";
+bool MarginalTree ::AreNodesSibling(int a, int b) const
+{
+    //
+    return GetSibling(a) == b;
+}
 
-  // Now output all the edges, by again starting from root and output all nodes
-  for (int i = 0; i < (int)listParentNodePos.size(); ++i) {
-    int parpos = listParentNodePos[i];
+void MarginalTree ::SortByLeafId()
+{
+    // sort based on leaf id. That is, leaf ids = 0,1,2,3,.. in the list
+    vector<int> listNodeLabelsNew(this->listNodeLabels.size());
+    vector<int> listParentNodePosNew(this->listParentNodePos.size());
+    vector<double> listEdgeDistNew(this->listEdgeDist.size());
+
+    listNodeLabelsNew = listNodeLabels;
+    listParentNodePosNew = listParentNodePos;
+    listEdgeDistNew = listEdgeDist;
+
+    // now sort and swap the leaf part
+    // collect leaves
+    vector<int> listLeafIds;
+    for (int i = 0; i < GetNumLeaves(); ++i)
+    {
+        listLeafIds.push_back(listNodeLabels[i]);
+    }
+    //vector<int> listLeafIdsOld = listLeafIds;
+    SortIntVec(listLeafIds);
+    //cout << "listLeafIds = ";
+    //DumpIntVec( listLeafIds );
+    // create a map
+    //map<int,int> mapLeafIdToOldPos;
+    //for( int i=0; i<(int)listLeafIdsOld.size(); ++i )
+    //{
+    //    mapLeafIdToOldPos.insert(map<int,int> :: value_type(listLeafIdsOld[i],i) );
+    //}
+    map<int, int> mapLeafIdToNewPos;
+    for (int i = 0; i < (int)listLeafIds.size(); ++i)
+    {
+        //cout << "Set map from id " << listLeafIds[i] << " to position " << i << endl;
+        mapLeafIdToNewPos.insert(map<int, int>::value_type(listLeafIds[i], i));
+    }
+    // now swap the info in each in the old list
+    for (int i = 0; i < (int)GetNumLeaves(); ++i)
+    {
+        int vid = listNodeLabels[i];
+        YW_ASSERT_INFO(mapLeafIdToNewPos.find(vid) != mapLeafIdToNewPos.end(), "FAIL to find");
+        int posNew = mapLeafIdToNewPos[vid];
+        //cout << "vid = " << vid << ", Set " << posNew << " to position " << i << endl;
+        listNodeLabelsNew[posNew] = vid;
+        listParentNodePosNew[posNew] = listParentNodePos[i];
+        listEdgeDistNew[posNew] = listEdgeDist[i];
+    }
 
-    // cout << "Output an edge \n";
-    outFile << "edge [\n";
-    outFile << "source " << parpos << endl;
-    outFile << "target  " << i << endl;
-    outFile << "label ";
-    OutputQuotedString(outFile, "");
-    outFile << "\n";
-    outFile << "]\n";
-  }
-
-  // Finally quite after closing file
-  outFile << "\n]\n";
-  outFile.close();
-}
-
-string MarginalTree ::GetNewick() const {
-  // return the newick format of the tree (with length)
-  // method: just get the newick at the root node
-  return GetNewickAt(GetTotNodesNum() - 1);
-}
-string MarginalTree ::GetNewickSorted(bool fLen) const {
-  //
-  return GetNewickAt(GetTotNodesNum() - 1, true, fLen);
-}
-
-string MarginalTree ::GetNewickAt(int node, bool fSort, bool fLen) const {
-  // find its descendents
-  string res;
-  int childLeft = GetLeftDescendant(node);
-  int childRight = GetRightDescendant(node);
-  if (childLeft < 0) {
-    // must be leaf
-    YW_ASSERT_INFO(IsLeaf(node) == true, "Wrong node in MT");
-    // for leaf, only ouput its label together with its length
-    char buf[100];
-    if (fLen == true) {
-      sprintf(buf, "%d:%f", GetLabel(node), GetEdgeLen(node));
-    } else {
-      sprintf(buf, "%d", GetLabel(node));
-    }
-    res = buf;
-  } else {
-    // append two children's
-    if (childRight < 0) {
-      Dump();
-    }
-    YW_ASSERT_INFO(childRight >= 0, "Left/right mismatch");
-    res = "(";
-    // res += GetNewickAt(childLeft);
-    // res +=",";
-    // res += GetNewickAt(childRight);
-    string strPart1 = GetNewickAt(childLeft, fSort, fLen);
-    string strPart2 = GetNewickAt(childRight, fSort, fLen);
-    string strToAdd;
-    if (fSort == false || strPart1 <= strPart2) {
-      res += strPart1;
-      res += ",";
-      res += strPart2;
-    } else {
-      res += strPart2;
-      res += ",";
-      res += strPart1;
-    }
-    res += strToAdd;
-    res += ")";
-    if (fLen == true && node < GetTotNodesNum() - 1) {
-      char buf[100];
-      sprintf(buf, ":%f", GetEdgeLen(node));
-      res += buf;
-    }
-  }
-  return res;
-}
-
-void MarginalTree ::GetLeavesUnder(int nn, set<int> &leavesUnder) const {
-  //
-  if (IsLeaf(nn) == true) {
-    leavesUnder.insert(nn);
-  } else {
-    set<int> listChildren;
-    GetChildren(nn, listChildren);
-    for (set<int>::iterator it = listChildren.begin(); it != listChildren.end();
-         ++it) {
-      GetLeavesUnder(*it, leavesUnder);
-    }
-  }
-}
-
-void MarginalTree ::GetlabelsFor(const set<int> &setPos,
-                                 set<int> &setLbls) const {
-  //
-  setLbls.clear();
-  for (set<int>::const_iterator it = setPos.begin(); it != setPos.end(); ++it) {
-    setLbls.insert(GetLabel(*it));
-  }
-}
-
-void MarginalTree ::GetLeafSetsForCuts(const vector<int> &listCuts,
-                                       vector<set<int> > &listLeafSets) const {
-  // this function finds the cutted subtrees' leaf sets for the given set of cut
-  // edges
-  listLeafSets.clear();
-
-  // we first create a map of whether an edge mutate or not
-  vector<bool> mapEdgeMutFlags;
-  for (int i = 0; i < this->GetTotNodesNum(); ++i) {
-    mapEdgeMutFlags.push_back(false);
-  }
-  for (int i = 0; i < (int)listCuts.size(); ++i) {
-    mapEdgeMutFlags[listCuts[i]] = true;
-  }
-
-  // we start by bottom up way to traversal all nodes
-  vector<set<int> > nodesLeaves(this->GetTotNodesNum());
-  for (int i = 0; i < this->GetNumLeaves(); ++i) {
-    // all leave nodes are trivial
-    nodesLeaves[i].insert(i);
-  }
-  // test for all nodes
-  for (int i = 0; i < this->GetTotNodesNum(); ++i) {
-    // if the edge is cut, we have found an partition or it is a root
-    if (mapEdgeMutFlags[i] == true || i == this->GetTotNodesNum() - 1) {
-      if (nodesLeaves[i].size() > 0) {
-        // cout << "Found one partition: ";
-        // DumpIntSet( nodesLeaves[i] );
-        listLeafSets.push_back(nodesLeaves[i]);
-      }
-    } else {
-      // otherwise propagate to above
-      UnionSets(nodesLeaves[this->GetParent(i)], nodesLeaves[i]);
-    }
-  }
-}
-
-int MarginalTree ::GetMRCAForNodes(const set<int> &listNodes) const {
-  // find mrca of a list of nodes
-  // we use a priority queue, each time, we try to find
-  priority_queue<int> queueNodesToCheck;
-  set<int> nodesVisited;
-
-  for (set<int>::iterator it = listNodes.begin(); it != listNodes.end(); ++it) {
-    queueNodesToCheck.push((*it) * (-1));
-  }
-  while (queueNodesToCheck.size() > 1) {
-    int curn = -queueNodesToCheck.top();
-    queueNodesToCheck.pop();
-
-    // in case there are duplicate ones, remove these duplicate copies
-    // this can happen if one node is another node's parent
-    if (-queueNodesToCheck.top() == curn) {
-      // don't work on this, wait for the next one
-      continue;
-    }
-
-    // is this visited
-    int pp = this->GetParent(curn);
-    // cout << "Processing curn: " << curn << ", parent: " << pp << endl;
-    if (nodesVisited.find(pp) == nodesVisited.end()) {
-      // new node
-      nodesVisited.insert(pp);
-      // push to queue
-      queueNodesToCheck.push(-1 * pp);
-    }
-  }
-  int res = -1 * queueNodesToCheck.top();
-  return res;
-}
-
-bool MarginalTree ::IsNodeUnder(int nn, int ancesNode) const {
-  //
-  if (nn > ancesNode) {
-    return false;
-  }
-  int curn = nn;
-  while (curn < ancesNode && curn >= 0) {
-    curn = this->GetParent(curn);
-  }
-  if (curn == ancesNode) {
-    return true;
-  } else {
-    return false;
-  }
-}
-
-void MarginalTree ::RandPermuateLeaves() {
-  // randomly permuate the leaves of the tree
-  // we do this by shuffeling the parent of the leaves
-  vector<int> parentsNewIndices;
-  GetRandVector(parentsNewIndices, 0, GetNumLeaves() - 1);
-  // cout << "Dump Random vector: ";
-  // DumpIntVec( parentsNewIndices );
-  // now shuffling it
-  vector<int> leavesParNew;
-  for (int i = 0; i < (int)parentsNewIndices.size(); ++i) {
-    leavesParNew.push_back(GetParent(parentsNewIndices[i]));
-  }
-  // now assign it
-  for (int i = 0; i < (int)parentsNewIndices.size(); ++i) {
-    SetParent(i, leavesParNew[i]);
-  }
-}
-
-int MarginalTree ::GetTriple(int i, int j, int k) const {
-  // ensure order of a,b,c first
-  OrderInt(i, j);
-  OrderInt(i, k);
-  OrderInt(j, k);
-
-  //
-  // is these have different triples on T1 and T2?
-  // we do this by getting MRCA for all pairs of MRCAs
-  int mrcaij1 = GetMRCA(i, j);
-  int mrcajk1 = GetMRCA(j, k);
-  int mrcaik1 = GetMRCA(i, k);
-
-  // now just test exhustively
-  if (mrcaij1 == mrcajk1) {
-    return 3;
-  } else if (mrcaij1 == mrcaik1) {
-    return 2;
-  } else {
-    return 1;
-  }
-}
-
-int MarginalTree ::GetSibling(int a) const {
-  // get sibling of the node (leaf or non-leaf)
-  int par = GetParent(a);
-  int lc = GetLeftDescendant(par);
-  int rc = GetRightDescendant(par);
-
-  YW_ASSERT_INFO(a == lc || a == rc, "Very wrong");
-  if (a == lc) {
-    return rc;
-  } else {
-    return lc;
-  }
-}
-
-bool MarginalTree ::AreNodesSibling(int a, int b) const {
-  //
-  return GetSibling(a) == b;
-}
-
-void MarginalTree ::SortByLeafId() {
-  // sort based on leaf id. That is, leaf ids = 0,1,2,3,.. in the list
-  vector<int> listNodeLabelsNew(this->listNodeLabels.size());
-  vector<int> listParentNodePosNew(this->listParentNodePos.size());
-  vector<double> listEdgeDistNew(this->listEdgeDist.size());
-
-  listNodeLabelsNew = listNodeLabels;
-  listParentNodePosNew = listParentNodePos;
-  listEdgeDistNew = listEdgeDist;
-
-  // now sort and swap the leaf part
-  // collect leaves
-  vector<int> listLeafIds;
-  for (int i = 0; i < GetNumLeaves(); ++i) {
-    listLeafIds.push_back(listNodeLabels[i]);
-  }
-  // vector<int> listLeafIdsOld = listLeafIds;
-  SortIntVec(listLeafIds);
-  // cout << "listLeafIds = ";
-  // DumpIntVec( listLeafIds );
-  // create a map
-  // map<int,int> mapLeafIdToOldPos;
-  // for( int i=0; i<(int)listLeafIdsOld.size(); ++i )
-  //{
-  //    mapLeafIdToOldPos.insert(map<int,int> :: value_type(listLeafIdsOld[i],i)
-  //    );
-  //}
-  map<int, int> mapLeafIdToNewPos;
-  for (int i = 0; i < (int)listLeafIds.size(); ++i) {
-    // cout << "Set map from id " << listLeafIds[i] << " to position " << i <<
-    // endl;
-    mapLeafIdToNewPos.insert(map<int, int>::value_type(listLeafIds[i], i));
-  }
-  // now swap the info in each in the old list
-  for (int i = 0; i < (int)GetNumLeaves(); ++i) {
-    int vid = listNodeLabels[i];
-    YW_ASSERT_INFO(mapLeafIdToNewPos.find(vid) != mapLeafIdToNewPos.end(),
-                   "FAIL to find");
-    int posNew = mapLeafIdToNewPos[vid];
-    // cout << "vid = " << vid << ", Set " << posNew << " to position " << i <<
-    // endl;
-    listNodeLabelsNew[posNew] = vid;
-    listParentNodePosNew[posNew] = listParentNodePos[i];
-    listEdgeDistNew[posNew] = listEdgeDist[i];
-  }
-
-#if 0 // there is some issues with this piece of code: namely, it can not deal
-      // with non-distinct id in trees properly. Although ids are expected to be
-      // distinct but sometime they are not; so change it on 8/13/13
-      // list leaf in order
+#if 0 // there is some issues with this piece of code: namely, it can not deal with non-distinct id in trees properly. Although ids are expected to be distinct but sometime they are not; so change it on 8/13/13
+	// list leaf in order
 	vector<int> listLeaves = this->listNodeLabels;
 	SortIntVec(listLeaves);
 cout << "after sorting, leaf list = ";
@@ -2239,147 +2457,156 @@ DumpIntVec( listLeaves);
 		}
 	}
 #endif
-  // now write back
-  this->listNodeLabels = listNodeLabelsNew;
-  this->listParentNodePos = listParentNodePosNew;
-  this->listEdgeDist = listEdgeDistNew;
-
-  // redo the descendents
-  BuildDescendantInfo();
-}
-
-void MarginalTree ::FixDupIds() {
-  // remove redundent ids with something new
-  // sort based on leaf id. That is, leaf ids = 0,1,2,3,.. in the list
-  // also, keep the leaf and internal nodes id separated
-  vector<int> listNodeLabelsNew(this->listNodeLabels.size());
-  int numLeaves = GetNumLeaves();
-
-  // list leaf in order
-  set<int> setNids;
-  PopulateSetByVec(setNids, this->listNodeLabels);
-  int idNext = *(setNids.rbegin()) + 1;
-
-  set<int> idsSeenBefore;
-
-  for (int i = 0; i < (int)this->listNodeLabels.size(); ++i) {
-    // keep a sorted list
-    int lvid = this->listNodeLabels[i];
-    if (idsSeenBefore.find(lvid) != idsSeenBefore.end()) {
-      lvid = idNext++;
-    }
-    listNodeLabelsNew[i] = lvid;
-    idsSeenBefore.insert(lvid);
-  }
-
-  // now inc the id of the internal nodes
-  for (int i = numLeaves; i < (int)listNodeLabelsNew.size(); ++i) {
-    listNodeLabelsNew[i] += 3 * numLeaves;
-  }
-
-  // now write back
-  this->listNodeLabels = listNodeLabelsNew;
-}
-
-void MarginalTree ::RearrangeParIncOrder() {
-  // cout << "--RearrangeParIncOrder:\n";
-  // sometimes the parent position is out of order, say 1,3,3,2,2,1,...
-  // we can rearrange the internal node so that it becomes 1,2,2,3,3,1...
-  // check the order of the appreance of the parent node
-  // CAUTION: after this, need to perform descendent list rebuilt
-
-  //#if 0
-  int curParOrderIndex = GetNumLeaves();
-  map<int, int> mapCurParPosToNewParPos;
-  set<int> setSeePars;
-  queue<int> nodesToProc;
-  // add in the leaves first
-  // vector<int> parposListNew(listParentNodePos.size() );
-  // // the new par and dist list parposListNew[ parposListNew.size()-1 ] = -1;
-  // // the last one is always -1 vector<double> distListNew(listEdgeDist.size()
-  // );
-  for (int i = 0; i < GetNumLeaves(); ++i) {
-    nodesToProc.push(i);
-  }
-  vector<int> listNewParsInOrder;
-  while (nodesToProc.empty() == false) {
-    int nodeCur = nodesToProc.front();
-    nodesToProc.pop();
-    int parpos = GetParent(nodeCur);
-    if (parpos < 0) {
-      // root, do nothing
-      continue;
-    }
-    // cout << "nodecur = " << nodeCur << ", parpos = " << parpos << endl;
-    if (setSeePars.find(parpos) == setSeePars.end()) {
-      // add to set and move to next
-      setSeePars.insert(parpos);
-    } else {
-      // have seen before, so record the mapping
-      YW_ASSERT_INFO(mapCurParPosToNewParPos.find(parpos) ==
-                         mapCurParPosToNewParPos.end(),
-                     "Should not be here");
-      ;
-      mapCurParPosToNewParPos.insert(
-          map<int, int>::value_type(parpos, curParOrderIndex));
-      // cout << "map old pos " << parpos << " to " << curParOrderIndex << endl;
-      ++curParOrderIndex;
-      // when a parent node is done, process it
-      nodesToProc.push(parpos);
-
-      listNewParsInOrder.push_back(parpos);
-    }
-  }
-
-  // now swap the par positions
-  vector<int> parposListNew;
-  vector<double> distListNew = listEdgeDist;
-  for (int ii = 0; ii < (int)GetNumLeaves(); ++ii) {
-    YW_ASSERT_INFO(mapCurParPosToNewParPos.find(listParentNodePos[ii]) !=
-                       mapCurParPosToNewParPos.end(),
-                   "False");
-    parposListNew.push_back(mapCurParPosToNewParPos[listParentNodePos[ii]]);
-  }
-  // then output the internal node in the given order
-  for (int ii = 0; ii < (int)listNewParsInOrder.size(); ++ii) {
-    int nindex = listNewParsInOrder[ii];
-    if (mapCurParPosToNewParPos.find(listParentNodePos[nindex]) !=
-        mapCurParPosToNewParPos.end()) {
-      parposListNew.push_back(
-          mapCurParPosToNewParPos[listParentNodePos[nindex]]);
-      // cout << "set dist of edge " << nindex << " (old dist " <<
-      // distListNew[nindex] << " to node " << mapCurParPosToNewParPos[ nindex ]
-      // << " w/ dist "; cout << listEdgeDist[ mapCurParPosToNewParPos[ nindex ]
-      // ] << endl;
-      distListNew[nindex] = listEdgeDist[mapCurParPosToNewParPos[nindex]];
-    } else {
-      parposListNew.push_back(-1);
-    }
-  }
-
-  // finally set up the new lists
-  this->listParentNodePos = parposListNew;
-  this->listEdgeDist = distListNew;
-  //#endif
-}
-
-string MarginalTree ::GetNewickNoBrLen() const {
-  // get the newick format w/o branch length
-  string strCurr = this->GetNewick();
-  PhylogenyTreeBasic trPhy;
-  trPhy.ConsOnNewick(strCurr);
-  trPhy.Order();
-  string res;
-  trPhy.ConsNewick(res);
-  return res;
-}
-
-string MarginalTree ::GetNewickNoBrLen2() const {
-  //
-  return GetNewickAt(GetTotNodesNum() - 1, true, false);
-}
-
-void MarginalTree ::RemapLeafLabels(const map<int, int> &mapLeafLblsToNew) {
+    // now write back
+    this->listNodeLabels = listNodeLabelsNew;
+    this->listParentNodePos = listParentNodePosNew;
+    this->listEdgeDist = listEdgeDistNew;
+
+    // redo the descendents
+    BuildDescendantInfo();
+}
+
+void MarginalTree ::FixDupIds()
+{
+    // remove redundent ids with something new
+    // sort based on leaf id. That is, leaf ids = 0,1,2,3,.. in the list
+    // also, keep the leaf and internal nodes id separated
+    vector<int> listNodeLabelsNew(this->listNodeLabels.size());
+    int numLeaves = GetNumLeaves();
+
+    // list leaf in order
+    set<int> setNids;
+    PopulateSetByVec(setNids, this->listNodeLabels);
+    int idNext = *(setNids.rbegin()) + 1;
+
+    set<int> idsSeenBefore;
+
+    for (int i = 0; i < (int)this->listNodeLabels.size(); ++i)
+    {
+        // keep a sorted list
+        int lvid = this->listNodeLabels[i];
+        if (idsSeenBefore.find(lvid) != idsSeenBefore.end())
+        {
+            lvid = idNext++;
+        }
+        listNodeLabelsNew[i] = lvid;
+        idsSeenBefore.insert(lvid);
+    }
+
+    // now inc the id of the internal nodes
+    for (int i = numLeaves; i < (int)listNodeLabelsNew.size(); ++i)
+    {
+        listNodeLabelsNew[i] += 3 * numLeaves;
+    }
+
+    // now write back
+    this->listNodeLabels = listNodeLabelsNew;
+}
+
+void MarginalTree ::RearrangeParIncOrder()
+{
+    //cout << "--RearrangeParIncOrder:\n";
+    // sometimes the parent position is out of order, say 1,3,3,2,2,1,...
+    // we can rearrange the internal node so that it becomes 1,2,2,3,3,1...
+    // check the order of the appreance of the parent node
+    // CAUTION: after this, need to perform descendent list rebuilt
+
+    //#if 0
+    int curParOrderIndex = GetNumLeaves();
+    map<int, int> mapCurParPosToNewParPos;
+    set<int> setSeePars;
+    queue<int> nodesToProc;
+    // add in the leaves first
+    //vector<int> parposListNew(listParentNodePos.size() );				// the new par and dist list
+    //parposListNew[ parposListNew.size()-1 ] = -1;			// the last one is always -1
+    //vector<double> distListNew(listEdgeDist.size() );
+    for (int i = 0; i < GetNumLeaves(); ++i)
+    {
+        nodesToProc.push(i);
+    }
+    vector<int> listNewParsInOrder;
+    while (nodesToProc.empty() == false)
+    {
+        int nodeCur = nodesToProc.front();
+        nodesToProc.pop();
+        int parpos = GetParent(nodeCur);
+        if (parpos < 0)
+        {
+            // root, do nothing
+            continue;
+        }
+        //cout << "nodecur = " << nodeCur << ", parpos = " << parpos << endl;
+        if (setSeePars.find(parpos) == setSeePars.end())
+        {
+            // add to set and move to next
+            setSeePars.insert(parpos);
+        }
+        else
+        {
+            // have seen before, so record the mapping
+            YW_ASSERT_INFO(mapCurParPosToNewParPos.find(parpos) == mapCurParPosToNewParPos.end(), "Should not be here");
+            ;
+            mapCurParPosToNewParPos.insert(map<int, int>::value_type(parpos, curParOrderIndex));
+            //cout << "map old pos " << parpos << " to " << curParOrderIndex << endl;
+            ++curParOrderIndex;
+            // when a parent node is done, process it
+            nodesToProc.push(parpos);
+
+            listNewParsInOrder.push_back(parpos);
+        }
+    }
+
+    // now swap the par positions
+    vector<int> parposListNew;
+    vector<double> distListNew = listEdgeDist;
+    for (int ii = 0; ii < (int)GetNumLeaves(); ++ii)
+    {
+        YW_ASSERT_INFO(mapCurParPosToNewParPos.find(listParentNodePos[ii]) != mapCurParPosToNewParPos.end(), "False");
+        parposListNew.push_back(mapCurParPosToNewParPos[listParentNodePos[ii]]);
+    }
+    // then output the internal node in the given order
+    for (int ii = 0; ii < (int)listNewParsInOrder.size(); ++ii)
+    {
+        int nindex = listNewParsInOrder[ii];
+        if (mapCurParPosToNewParPos.find(listParentNodePos[nindex]) != mapCurParPosToNewParPos.end())
+        {
+            parposListNew.push_back(mapCurParPosToNewParPos[listParentNodePos[nindex]]);
+            //cout << "set dist of edge " << nindex << " (old dist " << distListNew[nindex] << " to node " << mapCurParPosToNewParPos[ nindex ] << " w/ dist ";
+            //cout << listEdgeDist[ mapCurParPosToNewParPos[ nindex ] ] << endl;
+            distListNew[nindex] = listEdgeDist[mapCurParPosToNewParPos[nindex]];
+        }
+        else
+        {
+            parposListNew.push_back(-1);
+        }
+    }
+
+    // finally set up the new lists
+    this->listParentNodePos = parposListNew;
+    this->listEdgeDist = distListNew;
+    //#endif
+}
+
+string MarginalTree ::GetNewickNoBrLen() const
+{
+    // get the newick format w/o branch length
+    string strCurr = this->GetNewick();
+    PhylogenyTreeBasic trPhy;
+    trPhy.ConsOnNewick(strCurr);
+    trPhy.Order();
+    string res;
+    trPhy.ConsNewick(res);
+    return res;
+}
+
+string MarginalTree ::GetNewickNoBrLen2() const
+{
+    //
+    return GetNewickAt(GetTotNodesNum() - 1, true, false);
+}
+
+void MarginalTree ::RemapLeafLabels(const map<int, int> &mapLeafLblsToNew)
+{
 #if 0
 cout << "RemapLeafLabels: ";
 this->Dump();
@@ -2391,158 +2618,171 @@ cout << "[" << it->first << "," << it->second << "]  ";
 cout << endl;
 this->Dump();
 #endif
-  // convert each existing labels to consecutive labels e.g. 0, 1, 2, ...
-  for (int i = 0; i < (int)listNodeLabels.size(); ++i) {
-    int lblCur = listNodeLabels[i];
-    // cout << "lblCur: " << lblCur << endl;
-    YW_ASSERT_INFO(lblCur < 0 ||
-                       mapLeafLblsToNew.find(lblCur) != mapLeafLblsToNew.end(),
-                   "Fail to find123");
-    if (lblCur >= 0) {
-      listNodeLabels[i] = (*(mapLeafLblsToNew.find(lblCur))).second;
-    }
-  }
-  // rebuild descendent info
-  BuildDescendantInfo();
-}
-
-void MarginalTree ::MapLeafLblConsecutiveOrder(vector<int> &listLeafLblsOld) {
-  listLeafLblsOld.clear();
-  int idNext = 0;
-  MapLeafLblConsecutiveOrderAt(this->GetRoot(), idNext, listLeafLblsOld);
-  // adding the remaining internal nodes
-  for (int i = GetNumLeaves(); i < GetTotNodesNum(); ++i) {
-    listLeafLblsOld.push_back(GetLabel(i));
-    SetLabel(i, idNext);
-    ++idNext;
-  }
-}
-
-void MarginalTree ::MapLeafLblConsecutiveOrderAt(int rootST, int &idNext,
-                                                 vector<int> &listLeafLblsOld) {
-  if (IsLeaf(rootST)) {
-    listLeafLblsOld.push_back(GetLabel(rootST));
-    SetLabel(rootST, idNext);
-    ++idNext;
-  } else {
-    MapLeafLblConsecutiveOrderAt(GetLeftDescendant(rootST), idNext,
-                                 listLeafLblsOld);
-    MapLeafLblConsecutiveOrderAt(GetRightDescendant(rootST), idNext,
-                                 listLeafLblsOld);
-  }
-}
-
-void MarginalTree ::ResetIncLabel() {
-  //
-  for (int i = 0; i < GetNumLeaves(); ++i) {
-    listNodeLabels[i] = i;
-  }
-}
-
-void MarginalTree ::IncLabels() {
-  for (int i = 0; i < GetNumLeaves(); ++i) {
-    ++listNodeLabels[i];
-  }
-}
-
-void MarginalTree ::FindSibLeafPairs(
-    vector<pair<int, int> > &listSibPairs) const {
-  // cout << "FindSibLeafPairs:\n";
-  // Dump();
-  // find leaves that are siblings (return the index (note not label) of the sib
-  // pairs)
-  for (int i = GetNumLeaves(); i < GetTotNodesNum(); ++i) {
+    // convert each existing labels to consecutive labels e.g. 0, 1, 2, ...
+    for (int i = 0; i < (int)listNodeLabels.size(); ++i)
+    {
+        int lblCur = listNodeLabels[i];
+        //cout << "lblCur: " << lblCur << endl;
+        YW_ASSERT_INFO(lblCur < 0 || mapLeafLblsToNew.find(lblCur) != mapLeafLblsToNew.end(), "Fail to find123");
+        if (lblCur >= 0)
+        {
+            listNodeLabels[i] = (*(mapLeafLblsToNew.find(lblCur))).second;
+        }
+    }
+    // rebuild descendent info
+    BuildDescendantInfo();
+}
+
+void MarginalTree ::MapLeafLblConsecutiveOrder(vector<int> &listLeafLblsOld)
+{
+    listLeafLblsOld.clear();
+    int idNext = 0;
+    MapLeafLblConsecutiveOrderAt(this->GetRoot(), idNext, listLeafLblsOld);
+    // adding the remaining internal nodes
+    for (int i = GetNumLeaves(); i < GetTotNodesNum(); ++i)
+    {
+        listLeafLblsOld.push_back(GetLabel(i));
+        SetLabel(i, idNext);
+        ++idNext;
+    }
+}
+
+void MarginalTree ::MapLeafLblConsecutiveOrderAt(int rootST, int &idNext, vector<int> &listLeafLblsOld)
+{
+    if (IsLeaf(rootST))
+    {
+        listLeafLblsOld.push_back(GetLabel(rootST));
+        SetLabel(rootST, idNext);
+        ++idNext;
+    }
+    else
+    {
+        MapLeafLblConsecutiveOrderAt(GetLeftDescendant(rootST), idNext, listLeafLblsOld);
+        MapLeafLblConsecutiveOrderAt(GetRightDescendant(rootST), idNext, listLeafLblsOld);
+    }
+}
+
+void MarginalTree ::ResetIncLabel()
+{
+    //
+    for (int i = 0; i < GetNumLeaves(); ++i)
+    {
+        listNodeLabels[i] = i;
+    }
+}
+
+void MarginalTree ::IncLabels()
+{
+    for (int i = 0; i < GetNumLeaves(); ++i)
+    {
+        ++listNodeLabels[i];
+    }
+}
+
+void MarginalTree ::FindSibLeafPairs(vector<pair<int, int>> &listSibPairs) const
+{
+    //cout << "FindSibLeafPairs:\n";
+    //Dump();
+    // find leaves that are siblings (return the index (note not label) of the sib pairs)
+    for (int i = GetNumLeaves(); i < GetTotNodesNum(); ++i)
+    {
+        //
+        int nvleft = GetLeftDescendant(i);
+        int nvRight = GetRightDescendant(i);
+        if (IsLeaf(nvleft) == true && IsLeaf(nvRight) == true)
+        {
+            pair<int, int> pp(nvleft, nvRight);
+            listSibPairs.push_back(pp);
+        }
+    }
+    YW_ASSERT_INFO(listSibPairs.size() > 0, "Must have at least one pair");
+}
+
+void MarginalTree ::MakeLeafSubtreeOfTwo(int posLeaf, int lblChild1, int lblChild2, double len1, double len2)
+{
+    //cout << "MakeLeafSubtreeOfTwo: posLeaf: " << posLeaf << ", child1:" << lblChild1 << ", child2:" << lblChild2 << ", len1:" << len1 << ", len2:" << len2 << endl;
+    // add two new leaves below a leaf (here, the two new leaves are located at the end of leaves; and the new internal (original elaf)
+    // is right next to these new leaves)
+    // also clean up the tree a bit (set labels of internal nodes to be -1)
+    vector<int> listNodeLabelsNew;
+    vector<int> listParentNodePosNew;
+    vector<double> listEdgeDistNew;
+
     //
-    int nvleft = GetLeftDescendant(i);
-    int nvRight = GetRightDescendant(i);
-    if (IsLeaf(nvleft) == true && IsLeaf(nvRight) == true) {
-      pair<int, int> pp(nvleft, nvRight);
-      listSibPairs.push_back(pp);
-    }
-  }
-  YW_ASSERT_INFO(listSibPairs.size() > 0, "Must have at least one pair");
-}
-
-void MarginalTree ::MakeLeafSubtreeOfTwo(int posLeaf, int lblChild1,
-                                         int lblChild2, double len1,
-                                         double len2) {
-  // cout << "MakeLeafSubtreeOfTwo: posLeaf: " << posLeaf << ", child1:" <<
-  // lblChild1 << ", child2:" << lblChild2 << ", len1:" << len1 << ", len2:" <<
-  // len2 << endl;
-  // add two new leaves below a leaf (here, the two new leaves are located at
-  // the end of leaves; and the new internal (original elaf) is right next to
-  // these new leaves) also clean up the tree a bit (set labels of internal
-  // nodes to be -1)
-  vector<int> listNodeLabelsNew;
-  vector<int> listParentNodePosNew;
-  vector<double> listEdgeDistNew;
-
-  //
-  for (int i = 0; i < GetNumLeaves(); ++i) {
-    if (i != posLeaf) {
-      listNodeLabelsNew.push_back(GetLabel(i));
-      listParentNodePosNew.push_back(GetParent(i) + 2);
-      listEdgeDistNew.push_back(GetEdgeLen(i));
-    }
-  }
-  // add the two new leaves
-  listNodeLabelsNew.push_back(lblChild1);
-  listNodeLabelsNew.push_back(lblChild2);
-  int posCur = (int)listNodeLabelsNew.size();
-  listNodeLabelsNew.push_back(-1);
-  listParentNodePosNew.push_back(posCur);
-  listParentNodePosNew.push_back(posCur);
-  listParentNodePosNew.push_back(GetParent(posLeaf) + 2);
-  listEdgeDistNew.push_back(len1);
-  listEdgeDistNew.push_back(len2);
-  listEdgeDistNew.push_back(GetEdgeLen(posLeaf));
-  for (int i = GetNumLeaves(); i < GetTotNodesNum(); ++i) {
+    for (int i = 0; i < GetNumLeaves(); ++i)
+    {
+        if (i != posLeaf)
+        {
+            listNodeLabelsNew.push_back(GetLabel(i));
+            listParentNodePosNew.push_back(GetParent(i) + 2);
+            listEdgeDistNew.push_back(GetEdgeLen(i));
+        }
+    }
+    // add the two new leaves
+    listNodeLabelsNew.push_back(lblChild1);
+    listNodeLabelsNew.push_back(lblChild2);
+    int posCur = (int)listNodeLabelsNew.size();
     listNodeLabelsNew.push_back(-1);
-    if (GetParent(i) >= 0) {
-      listParentNodePosNew.push_back(GetParent(i) + 2);
-    } else {
-      listParentNodePosNew.push_back(-1);
-    }
-    listEdgeDistNew.push_back(GetEdgeLen(i));
-  }
-
-  // now update the info
-  ++this->numLeaves;
-  this->listNodeLabels = listNodeLabelsNew;
-  this->listParentNodePos = listParentNodePosNew;
-  this->listEdgeDist = listEdgeDistNew;
-  listLeftDescs.clear();
-  listRightDescs.clear();
-  BuildDescendantInfo();
-}
-
-void MarginalTree ::GetLabelListForLeaf(vector<int> &listLbls) const {
-  //
-  listLbls.clear();
-  for (int i = 0; i < GetNumLeaves(); ++i) {
-    listLbls.push_back(GetLabel(i));
-  }
-}
-
-void MarginalTree ::FindDiffSubtreesFrom(const MarginalTree &mtreeRef,
-                                         set<int> &setDiffBrs,
-                                         set<int> &setDiffRefMissed) const {
-  // find all branches (subtrees below them) that are not in the reference tree
-  // setDiffBrs: in this tree but not in reference tree
-  // setDiffRefMissed: in reference tree but not in this tree
-  vector<set<int> > listSubtreesRef;
-  mtreeRef.ConsDecedentLeavesInfoLabels(listSubtreesRef);
-  vector<set<int> > listSubtreesThis;
-  ConsDecedentLeavesInfoLabels(listSubtreesThis);
-  set<set<int> > setSubtreesRef;
-  PopulateSetByVecGen(setSubtreesRef, listSubtreesRef);
-  set<set<int> > setSubtreesThis;
-  PopulateSetByVecGen(setSubtreesThis, listSubtreesThis);
-  //
-  setDiffBrs.clear();
-  for (int i = 0; i < (int)listSubtreesThis.size(); ++i) {
-    if (setSubtreesRef.find(listSubtreesThis[i]) == setSubtreesRef.end()) {
-      setDiffBrs.insert(i);
+    listParentNodePosNew.push_back(posCur);
+    listParentNodePosNew.push_back(posCur);
+    listParentNodePosNew.push_back(GetParent(posLeaf) + 2);
+    listEdgeDistNew.push_back(len1);
+    listEdgeDistNew.push_back(len2);
+    listEdgeDistNew.push_back(GetEdgeLen(posLeaf));
+    for (int i = GetNumLeaves(); i < GetTotNodesNum(); ++i)
+    {
+        listNodeLabelsNew.push_back(-1);
+        if (GetParent(i) >= 0)
+        {
+            listParentNodePosNew.push_back(GetParent(i) + 2);
+        }
+        else
+        {
+            listParentNodePosNew.push_back(-1);
+        }
+        listEdgeDistNew.push_back(GetEdgeLen(i));
+    }
+
+    // now update the info
+    ++this->numLeaves;
+    this->listNodeLabels = listNodeLabelsNew;
+    this->listParentNodePos = listParentNodePosNew;
+    this->listEdgeDist = listEdgeDistNew;
+    listLeftDescs.clear();
+    listRightDescs.clear();
+    BuildDescendantInfo();
+}
+
+void MarginalTree ::GetLabelListForLeaf(vector<int> &listLbls) const
+{
+    //
+    listLbls.clear();
+    for (int i = 0; i < GetNumLeaves(); ++i)
+    {
+        listLbls.push_back(GetLabel(i));
+    }
+}
+
+void MarginalTree ::FindDiffSubtreesFrom(const MarginalTree &mtreeRef, set<int> &setDiffBrs, set<int> &setDiffRefMissed) const
+{
+    // find all branches (subtrees below them) that are not in the reference tree
+    // setDiffBrs: in this tree but not in reference tree
+    // setDiffRefMissed: in reference tree but not in this tree
+    vector<set<int>> listSubtreesRef;
+    mtreeRef.ConsDecedentLeavesInfoLabels(listSubtreesRef);
+    vector<set<int>> listSubtreesThis;
+    ConsDecedentLeavesInfoLabels(listSubtreesThis);
+    set<set<int>> setSubtreesRef;
+    PopulateSetByVecGen(setSubtreesRef, listSubtreesRef);
+    set<set<int>> setSubtreesThis;
+    PopulateSetByVecGen(setSubtreesThis, listSubtreesThis);
+    //
+    setDiffBrs.clear();
+    for (int i = 0; i < (int)listSubtreesThis.size(); ++i)
+    {
+        if (setSubtreesRef.find(listSubtreesThis[i]) == setSubtreesRef.end())
+        {
+            setDiffBrs.insert(i);
 
 #if 0
             // alsoinsert any ancestral edge into it
@@ -2560,35 +2800,42 @@ void MarginalTree ::FindDiffSubtreesFrom(const MarginalTree &mtreeRef,
                 }
             }
 #endif
+        }
     }
-  }
-  setDiffRefMissed.clear();
-  for (int i = 0; i < (int)listSubtreesRef.size(); ++i) {
-    if (setSubtreesThis.find(listSubtreesRef[i]) == setSubtreesThis.end()) {
-      setDiffRefMissed.insert(i);
-    }
-  }
-}
-
-bool MarginalTree ::IsOutgroup(int lvid) const {
-  // cout << "IsOutgroup: lvid = " << lvid << ", tree is: ";
-  // Dump();
-  int rtn = GetRoot();
-  // check two children of root
-  int lc = GetLeftDescendant(rtn);
-  if (IsLeaf(lc)) {
-    if (GetLabel(lc) == lvid) {
-      // cout << "good OG\n";
-      return true;
-    }
-  }
-  int rc = GetRightDescendant(rtn);
-  if (IsLeaf(rc)) {
-    if (GetLabel(rc) == lvid) {
-      // cout << "good OG\n";
-      return true;
-    }
-  }
-  // cout << "BAD OG\n";
-  return false;
+    setDiffRefMissed.clear();
+    for (int i = 0; i < (int)listSubtreesRef.size(); ++i)
+    {
+        if (setSubtreesThis.find(listSubtreesRef[i]) == setSubtreesThis.end())
+        {
+            setDiffRefMissed.insert(i);
+        }
+    }
+}
+
+bool MarginalTree ::IsOutgroup(int lvid) const
+{
+    //cout << "IsOutgroup: lvid = " << lvid << ", tree is: ";
+    //Dump();
+    int rtn = GetRoot();
+    // check two children of root
+    int lc = GetLeftDescendant(rtn);
+    if (IsLeaf(lc))
+    {
+        if (GetLabel(lc) == lvid)
+        {
+            //cout << "good OG\n";
+            return true;
+        }
+    }
+    int rc = GetRightDescendant(rtn);
+    if (IsLeaf(rc))
+    {
+        if (GetLabel(rc) == lvid)
+        {
+            //cout << "good OG\n";
+            return true;
+        }
+    }
+    //cout << "BAD OG\n";
+    return false;
 }
diff --git a/trisicell/external/scistree/MarginalTree.h b/trisicell/external/scistree/MarginalTree.h
index 0ac2709..82348c0 100644
--- a/trisicell/external/scistree/MarginalTree.h
+++ b/trisicell/external/scistree/MarginalTree.h
@@ -9,168 +9,139 @@ using namespace std;
 #include "Utils2.h"
 #include "Utils3.h"
 
-//////////////////////////////////////////////////////////////////////////////
-// Define a simple coalescent tree. My experience shows that such a data
-// structure can be quite useful
+///////////////////////////////////////////////////////////////////////////////////////////////
+// Define a simple coalescent tree. My experience shows that such a data structure can
+// be quite useful
 
 // yet another structure to represent marginal tree
 
-class MarginalTree {
+class MarginalTree
+{
 public:
-  MarginalTree();
-  void Clear();
-  void Binarize();
-  void Consolidate();
-  void BuildDescendantInfo();
-  void InitDefaultEdgeLen();
-  void InitUnitEdgelen();
-  double GetDefaultEdgeLen(int child);
-  void SetParent(int child, int par, bool fAdjLen = true);
-  int GetParent(int child) const;
-  int GetLeftDescendant(int node) const;
-  int GetRightDescendant(int node) const;
-  double GetEdgeLen(int childNodeIndex) const;
-  double GetTotEdgeLen() const;
-  int GetTotNodesNum() const { return listNodeLabels.size(); }
-  int GetNumLeaves() const { return numLeaves; }
-  void SetNumLeaves(int nl) { numLeaves = nl; }
-  void ConsDecedentInfo(vector<vector<int> > &descNodes) const;
-  void ConsAllDecedentInfo(vector<set<int> > &descNodes,
-                           bool fIncSelf = true) const;
-  void ConsDecedentLeavesInfo(vector<set<int> > &descNodes) const;
-  void ConsDecedentLeavesInfoLabels(vector<set<int> > &leafNodeLabels) const;
-  void ConsHeightsInfo(vector<int> &nodesHt) const;
-  void Dump() const;
-  int GetLabel(int r) const {
-    YW_ASSERT_INFO(r >= 0 && r < (int)listNodeLabels.size(), "wrong3");
-    return listNodeLabels[r];
-  }
-  void SetLabel(int node, int lbl) {
-    YW_ASSERT_INFO(node >= 0 && node < (int)listNodeLabels.size(), "wrong4");
-    listNodeLabels[node] = lbl;
-  }
-  int GetPosForLabel(int lbl) const;
-  void GetlabelsFor(const set<int> &setPos, set<int> &setLbls) const;
-  bool IsLeaf(int node) const { return node >= 0 && node < numLeaves; }
-  bool IsToplogicSame(const MarginalTree &tree) const;
-  int GetMRCA(int v1, int v2) const;
-  int GetFirstNonselfAnces(int v, const set<int> &setAnces) const;
-  void GetChildren(int node, set<int> &listChildren) const;
-  int GetMaxHt() const;
-  void RemoveLeafNodeFromBinaryTree(int lfn);
-  bool AreTwoPathsDisjoint(int sn1, int en1, int sn2, int en2) const;
-  int GetPath(int sn, int en, set<int> &edgesOnPath) const;
-  double GetPathLen(int sn, int en);
-  void OutputGML(const char *fileName) const;
-  string GetNewick() const;
-  string GetNewickSorted(bool fLen) const;
-  string GetNewickNoBrLen() const;
-  string GetNewickNoBrLen2() const;
-  void GetLeavesUnder(int nn, set<int> &leavesUnder) const;
-  void GetLeafSetsForCuts(const vector<int> &listCuts,
-                          vector<set<int> > &listLeafSets) const;
-  int GetMRCAForNodes(const set<int> &listNodes) const;
-  bool IsNodeUnder(int nn, int ancesNode) const;
-  void RandPermuateLeaves();
-  int GetTriple(int a, int b, int c) const;
-  int GetSibling(int a) const;
-  bool AreNodesSibling(int a, int b) const;
-  void SetBranchLen(int b, double len) {
-    YW_ASSERT_INFO(b < (int)listEdgeDist.size(), "Branch wrong");
-    listEdgeDist[b] = len;
-  }
-  void SetLabelList(const vector<int> &listLbls) { listNodeLabels = listLbls; }
-  void GetLabelList(vector<int> &listLbls) const { listLbls = listNodeLabels; }
-  void GetLabelListForLeaf(vector<int> &listLbls) const;
-  void SetParList(const vector<int> &listPars) { listParentNodePos = listPars; }
-  void SetBranchLenList(const vector<double> &listLens) {
-    listEdgeDist = listLens;
-  }
-  void SortByLeafId();
-  void FixDupIds();
-  double GetHeight() const;
-  int GetRoot() const { return GetTotNodesNum() - 1; }
-  void SwapBranches(int nodeBranch1, int nodeBranch2);
-  void RearrangeParIncOrder();
-  void ResetIncLabel();
-  void IncLabels();
-  void GetTreeEdgeLen(vector<double> &listEdgeDistOut) const {
-    listEdgeDistOut = this->listEdgeDist;
-  }
-  void MapLeafLblConsecutiveOrder(vector<int> &listLeafLblsOld);
-  void RemapLeafLabels(const map<int, int> &mapLeafLblsToNew);
-  void FindAllSplits(vector<set<int> > &listSplits) const;
-  void FindSibLeafPairs(vector<pair<int, int> > &listSibPairs) const;
-  void MakeLeafSubtreeOfTwo(int posLeaf, int lblChild1, int lblChild2,
-                            double len1, double len2);
-  void FindDiffSubtreesFrom(const MarginalTree &mtreeRef, set<int> &setDiffBrs,
-                            set<int> &setDiffBrsOrigOnly) const;
-  bool IsOutgroup(int lvid) const;
+    MarginalTree();
+    void Clear();
+    void Binarize();
+    void Consolidate();
+    void BuildDescendantInfo();
+    void InitDefaultEdgeLen();
+    void InitUnitEdgelen();
+    double GetDefaultEdgeLen(int child);
+    void SetParent(int child, int par, bool fAdjLen = true);
+    int GetParent(int child) const;
+    int GetLeftDescendant(int node) const;
+    int GetRightDescendant(int node) const;
+    double GetEdgeLen(int childNodeIndex) const;
+    double GetTotEdgeLen() const;
+    int GetTotNodesNum() const { return listNodeLabels.size(); }
+    int GetNumLeaves() const { return numLeaves; }
+    void SetNumLeaves(int nl) { numLeaves = nl; }
+    void ConsDecedentInfo(vector<vector<int>> &descNodes) const;
+    void ConsAllDecedentInfo(vector<set<int>> &descNodes, bool fIncSelf = true) const;
+    void ConsDecedentLeavesInfo(vector<set<int>> &descNodes) const;
+    void ConsDecedentLeavesInfoLabels(vector<set<int>> &leafNodeLabels) const;
+    void ConsHeightsInfo(vector<int> &nodesHt) const;
+    void Dump() const;
+    int GetLabel(int r) const
+    {
+        YW_ASSERT_INFO(r >= 0 && r < (int)listNodeLabels.size(), "wrong3");
+        return listNodeLabels[r];
+    }
+    void SetLabel(int node, int lbl)
+    {
+        YW_ASSERT_INFO(node >= 0 && node < (int)listNodeLabels.size(), "wrong4");
+        listNodeLabels[node] = lbl;
+    }
+    int GetPosForLabel(int lbl) const;
+    void GetlabelsFor(const set<int> &setPos, set<int> &setLbls) const;
+    bool IsLeaf(int node) const { return node >= 0 && node < numLeaves; }
+    bool IsToplogicSame(const MarginalTree &tree) const;
+    int GetMRCA(int v1, int v2) const;
+    int GetFirstNonselfAnces(int v, const set<int> &setAnces) const;
+    void GetChildren(int node, set<int> &listChildren) const;
+    int GetMaxHt() const;
+    void RemoveLeafNodeFromBinaryTree(int lfn);
+    bool AreTwoPathsDisjoint(int sn1, int en1, int sn2, int en2) const;
+    int GetPath(int sn, int en, set<int> &edgesOnPath) const;
+    double GetPathLen(int sn, int en);
+    void OutputGML(const char *fileName) const;
+    string GetNewick() const;
+    string GetNewickSorted(bool fLen) const;
+    string GetNewickNoBrLen() const;
+    string GetNewickNoBrLen2() const;
+    void GetLeavesUnder(int nn, set<int> &leavesUnder) const;
+    void GetLeafSetsForCuts(const vector<int> &listCuts, vector<set<int>> &listLeafSets) const;
+    int GetMRCAForNodes(const set<int> &listNodes) const;
+    bool IsNodeUnder(int nn, int ancesNode) const;
+    void RandPermuateLeaves();
+    int GetTriple(int a, int b, int c) const;
+    int GetSibling(int a) const;
+    bool AreNodesSibling(int a, int b) const;
+    void SetBranchLen(int b, double len)
+    {
+        YW_ASSERT_INFO(b < (int)listEdgeDist.size(), "Branch wrong");
+        listEdgeDist[b] = len;
+    }
+    void SetLabelList(const vector<int> &listLbls) { listNodeLabels = listLbls; }
+    void GetLabelList(vector<int> &listLbls) const { listLbls = listNodeLabels; }
+    void GetLabelListForLeaf(vector<int> &listLbls) const;
+    void SetParList(const vector<int> &listPars) { listParentNodePos = listPars; }
+    void SetBranchLenList(const vector<double> &listLens) { listEdgeDist = listLens; }
+    void SortByLeafId();
+    void FixDupIds();
+    double GetHeight() const;
+    int GetRoot() const { return GetTotNodesNum() - 1; }
+    void SwapBranches(int nodeBranch1, int nodeBranch2);
+    void RearrangeParIncOrder();
+    void ResetIncLabel();
+    void IncLabels();
+    void GetTreeEdgeLen(vector<double> &listEdgeDistOut) const { listEdgeDistOut = this->listEdgeDist; }
+    void MapLeafLblConsecutiveOrder(vector<int> &listLeafLblsOld);
+    void RemapLeafLabels(const map<int, int> &mapLeafLblsToNew);
+    void FindAllSplits(vector<set<int>> &listSplits) const;
+    void FindSibLeafPairs(vector<pair<int, int>> &listSibPairs) const;
+    void MakeLeafSubtreeOfTwo(int posLeaf, int lblChild1, int lblChild2, double len1, double len2);
+    void FindDiffSubtreesFrom(const MarginalTree &mtreeRef, set<int> &setDiffBrs, set<int> &setDiffBrsOrigOnly) const;
+    bool IsOutgroup(int lvid) const;
 
 public:
-  int CalcNormHeight(int node);
-  void GetParPosInfo(vector<int> &parPosList) {
-    parPosList = listParentNodePos;
-  }
-  double GetHeightOfNode(int node) const;
+    int CalcNormHeight(int node);
+    void GetParPosInfo(vector<int> &parPosList) { parPosList = listParentNodePos; }
+    double GetHeightOfNode(int node) const;
 
-  // Use an array to store  leaves
-  int numLeaves;
-  // assume the first numLeaves nodes are leaves
-  vector<int> listNodeLabels;
-  vector<int> listParentNodePos;
-  vector<double> listEdgeDist;
-  vector<int> listLeftDescs;
-  vector<int> listRightDescs;
+    // Use an array to store  leaves
+    int numLeaves;
+    // assume the first numLeaves nodes are leaves
+    vector<int> listNodeLabels;
+    vector<int> listParentNodePos;
+    vector<double> listEdgeDist;
+    vector<int> listLeftDescs;
+    vector<int> listRightDescs;
 
 private:
-  string GetNewickAt(int node, bool fSort = false, bool fLen = true) const;
-  void MapLeafLblConsecutiveOrderAt(int rootST, int &idNext,
-                                    vector<int> &listLeafLblsOld);
+    string GetNewickAt(int node, bool fSort = false, bool fLen = true) const;
+    void MapLeafLblConsecutiveOrderAt(int rootST, int &idNext, vector<int> &listLeafLblsOld);
 };
 
-////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////
 // Global Utilities
 class TaxaMapper;
 
 bool ReadinMarginalTrees(ifstream &inFile, vector<MarginalTree> &treeList);
-bool ReadinMarginalTreesNewick(ifstream &inFile, int numLeaves,
-                               vector<MarginalTree> &treeList,
-                               TaxaMapper *pTMapper = NULL, bool fDup = false);
-bool ReadinMarginalTreesNewickWLen(ifstream &inFile, int numLeaves,
-                                   vector<MarginalTree> &treeList,
-                                   TaxaMapper *pTMapper = NULL);
+bool ReadinMarginalTreesNewick(ifstream &inFile, int numLeaves, vector<MarginalTree> &treeList, TaxaMapper *pTMapper = NULL, bool fDup = false);
+bool ReadinMarginalTreesNewickWLen(ifstream &inFile, int numLeaves, vector<MarginalTree> &treeList, TaxaMapper *pTMapper = NULL);
 void AddRootAsLeafToTree(MarginalTree &tree1, bool fIdNonNeg = false);
 void GenRandBinaryTree(int numLeaves, MarginalTree &tree1);
 void GenRandBinaryTreeClock(int numLeaves, double totHt, MarginalTree &tree1);
-// vector<int>: list of leaves in the order from top down, int = top node of
-// chain
-void FindChainsInTree(const MarginalTree &tree1,
-                      map<vector<int>, int> &foundChains);
-void InitMarginalTree(MarginalTree &mTree, int numLeaves,
-                      const vector<int> &listLabels,
-                      const vector<int> &listParentNodePos);
-bool ReadinMarginalTreesNewickWLenString(const string &strNewick, int numLeaves,
-                                         MarginalTree &treeOut,
-                                         bool fStartFromZero = true,
-                                         TaxaMapper *pTMapper = NULL);
-void CollapseEquivTrees(const vector<MarginalTree> &listOrigTrees,
-                        vector<MarginalTree> &listUniqTrees,
-                        vector<int> &listMultiplicity);
-void FindOneNNIMTreesFrom(
-    MarginalTree &mTreeSrc, vector<MarginalTree> &listNNITrees,
-    vector<pair<int, int> > *pListPairEdgesSwapped = NULL);
-void CreateSubtreeFromLeaves(MarginalTree &mTreeOrig,
-                             const set<int> &setLeafLabels,
-                             MarginalTree &mTreeSub,
-                             map<int, int> &mapNewNodeToOldNode);
-void UpdateBranchLenInSubtree(MarginalTree &mTreeOrig,
-                              map<int, int> &mapNewNodeToOldNode,
-                              MarginalTree &mTreeSub);
-void RemapLeafIntLabelsTaxaMap(MarginalTree &mtree,
-                               map<string, string> &mapper);
+// vector<int>: list of leaves in the order from top down, int = top node of chain
+void FindChainsInTree(const MarginalTree &tree1, map<vector<int>, int> &foundChains);
+void InitMarginalTree(MarginalTree &mTree, int numLeaves, const vector<int> &listLabels, const vector<int> &listParentNodePos);
+bool ReadinMarginalTreesNewickWLenString(const string &strNewick, int numLeaves, MarginalTree &treeOut, bool fStartFromZero = true, TaxaMapper *pTMapper = NULL);
+void CollapseEquivTrees(const vector<MarginalTree> &listOrigTrees, vector<MarginalTree> &listUniqTrees, vector<int> &listMultiplicity);
+void FindOneNNIMTreesFrom(MarginalTree &mTreeSrc, vector<MarginalTree> &listNNITrees, vector<pair<int, int>> *pListPairEdgesSwapped = NULL);
+void CreateSubtreeFromLeaves(MarginalTree &mTreeOrig, const set<int> &setLeafLabels, MarginalTree &mTreeSub, map<int, int> &mapNewNodeToOldNode);
+void UpdateBranchLenInSubtree(MarginalTree &mTreeOrig, map<int, int> &mapNewNodeToOldNode, MarginalTree &mTreeSub);
+void RemapLeafIntLabelsTaxaMap(MarginalTree &mtree, map<string, string> &mapper);
 void RemapMargTree(MarginalTree &mtree, TaxaMapper &refTMapper);
-void FindMatchedSubtrees(MarginalTree &mtreeNew, MarginalTree &mtreeRef,
-                         map<int, int> &mapSTNewToRef);
+void FindMatchedSubtrees(MarginalTree &mtreeNew, MarginalTree &mtreeRef, map<int, int> &mapSTNewToRef);
 
 #endif // MARGINAL_TREE_H
diff --git a/trisicell/external/scistree/PhylogenyTree.cpp b/trisicell/external/scistree/PhylogenyTree.cpp
index 9f37546..83f2e5a 100644
--- a/trisicell/external/scistree/PhylogenyTree.cpp
+++ b/trisicell/external/scistree/PhylogenyTree.cpp
@@ -1,7 +1,7 @@
-#include "PhylogenyTree.h"
+#include <stack>
 #include <fstream>
 #include <iostream>
-#include <stack>
+#include "PhylogenyTree.h"
 
 // ***************************************************************************
 // The following code is largely based on Gusfield's 1991 Paper
@@ -12,526 +12,566 @@ extern void OutputQuotedString(ofstream &outFile, const char *buf);
 // Utilites functions
 // ***************************************************************************
 
-int PhylogenyTree ::GetIntLabelFromParenthStr(const string &strLabelWParenth) {
-  //
-  YW_ASSERT_INFO(strLabelWParenth[0] == '(' &&
-                     strLabelWParenth[strLabelWParenth.length() - 1] == ')',
-                 "String does not come with ()");
-  string strPrune = strLabelWParenth.substr(1, strLabelWParenth.length() - 2);
-  int res = -1;
-  sscanf(strPrune.c_str(), "%d", &res);
-  return res;
+int PhylogenyTree ::GetIntLabelFromParenthStr(const string &strLabelWParenth)
+{
+    //
+    YW_ASSERT_INFO(strLabelWParenth[0] == '(' && strLabelWParenth[strLabelWParenth.length() - 1] == ')', "String does not come with ()");
+    string strPrune = strLabelWParenth.substr(1, strLabelWParenth.length() - 2);
+    int res = -1;
+    sscanf(strPrune.c_str(), "%d", &res);
+    return res;
 }
 
-void PhylogenyTree ::GetARoot(const BinaryMatrix &mat, vector<int> &root) {
-  if (knownRoot.size() > 0) {
-    root = knownRoot;
-    return;
-  }
-
-  // We take the majority sequence as root. Refer to the paper for details
-  root.clear();
-  for (int c = 0; c < mat.GetColNum(); ++c) {
-    int rc = 0;
-    int numOne = 0;
-    for (int r = 0; r < mat.GetRowNum(); ++r) {
-      if (mat(r, c) == 1) {
-        numOne++;
-      }
+void PhylogenyTree ::GetARoot(const BinaryMatrix &mat, vector<int> &root)
+{
+    if (knownRoot.size() > 0)
+    {
+        root = knownRoot;
+        return;
     }
-    // 12/08/07: fixed. Must consider the case say 6 0 and 5 1,
-    // has to plus one to ensure correctness
-    if (numOne >= (mat.GetRowNum() + 1) / 2) {
-      rc = 1;
+
+    // We take the majority sequence as root. Refer to the paper for details
+    root.clear();
+    for (int c = 0; c < mat.GetColNum(); ++c)
+    {
+        int rc = 0;
+        int numOne = 0;
+        for (int r = 0; r < mat.GetRowNum(); ++r)
+        {
+            if (mat(r, c) == 1)
+            {
+                numOne++;
+            }
+        }
+        // 12/08/07: fixed. Must consider the case say 6 0 and 5 1,
+        // has to plus one to ensure correctness
+        if (numOne >= (mat.GetRowNum() + 1) / 2)
+        {
+            rc = 1;
+        }
+        root.push_back(rc);
     }
-    root.push_back(rc);
-  }
-  //    cout << "Root = ";
-  //    DumpIntVec ( root );
+    //    cout << "Root = ";
+    //    DumpIntVec ( root );
 }
 
-void PhylogenyTree ::RadixSortByCol(const BinaryMatrix &mat,
-                                    const vector<int> &root,
-                                    vector<int> &sortList) {
-  // cout << "root = ";
-  // DumpIntVec( root );
-  // This is the step 1 of Gusfield tree building algorithm
-  // We treat each column as a number, encoded by the binary vecgtor stored in
-  // the column row 1 contains the MSB of the number The result is stored in a
-  // sorted list, with LARGEST number comes in first For details of radix sort,
-  // refer CLR
-  sortList.clear();
-  for (int i = 0; i < mat.GetColNum(); ++i) {
-    sortList.push_back(i);
-  }
-
-  // Now sort from LSB of the number, i.e. last row first
-  for (int i = mat.GetRowNum() - 1; i >= 0; --i) {
-    SortByOneBit(i, mat, root, sortList);
-  }
+void PhylogenyTree ::RadixSortByCol(const BinaryMatrix &mat, const vector<int> &root, vector<int> &sortList)
+{
+    //cout << "root = ";
+    //DumpIntVec( root );
+    // This is the step 1 of Gusfield tree building algorithm
+    // We treat each column as a number, encoded by the binary vecgtor stored in the column
+    // row 1 contains the MSB of the number
+    // The result is stored in a sorted list, with LARGEST number comes in first
+    // For details of radix sort, refer CLR
+    sortList.clear();
+    for (int i = 0; i < mat.GetColNum(); ++i)
+    {
+        sortList.push_back(i);
+    }
+
+    // Now sort from LSB of the number, i.e. last row first
+    for (int i = mat.GetRowNum() - 1; i >= 0; --i)
+    {
+        SortByOneBit(i, mat, root, sortList);
+    }
 }
 
-void PhylogenyTree ::SortByOneBit(int bitPosRow, const BinaryMatrix &mat,
-                                  const vector<int> &root,
-                                  vector<int> &sortList) {
-  // cout << "bitPosRow = " << bitPosRow << endl;
-  // cout << "root here = ";
-  // DumpIntVec( root );
-  // cout << "entry sortList = ";
-  // DumpIntVec( sortList );
-  // Sort the list by one bit (the ith row)
-  // Initailize a pre-list, holding the last sorted list. Simply initailize to
-  // original order
-  vector<int> preList = sortList;
-  sortList.clear();
-
-  // We do two path, first to find 1 cells in that row and next one cell (since
-  // we want the LARGEST first) This is in fact counting sort, with k (the
-  // limit) == 1
-  for (int i = 0; i < preList.size(); ++i) {
-    // Note that we 1 = NON-ROOT-VALUE
-    // cout << "mat(bitPosRow, preList[i] ) = " << mat( bitPosRow, preList[i]  )
-    // << endl;
-    if (mat(bitPosRow, preList[i]) != root[preList[i]]) {
-      sortList.push_back(preList[i]);
+void PhylogenyTree ::SortByOneBit(int bitPosRow, const BinaryMatrix &mat, const vector<int> &root,
+                                  vector<int> &sortList)
+{
+    //cout << "bitPosRow = " << bitPosRow << endl;
+    //cout << "root here = ";
+    //DumpIntVec( root );
+    //cout << "entry sortList = ";
+    //DumpIntVec( sortList );
+    // Sort the list by one bit (the ith row)
+    // Initailize a pre-list, holding the last sorted list. Simply initailize to original order
+    vector<int> preList = sortList;
+    sortList.clear();
+
+    // We do two path, first to find 1 cells in that row and next one cell (since we want the LARGEST first)
+    // This is in fact counting sort, with k (the limit) == 1
+    for (int i = 0; i < preList.size(); ++i)
+    {
+        // Note that we 1 = NON-ROOT-VALUE
+        // cout << "mat(bitPosRow, preList[i] ) = " << mat( bitPosRow, preList[i]  ) << endl;
+        if (mat(bitPosRow, preList[i]) != root[preList[i]])
+        {
+            sortList.push_back(preList[i]);
+        }
     }
-  }
-  // cout << "parital sortList = ";
-  // DumpIntVec( sortList );
+    //cout << "parital sortList = ";
+    //DumpIntVec( sortList );
 
-  for (int i = 0; i < preList.size(); ++i) {
-    if (mat(bitPosRow, preList[i]) == root[preList[i]]) {
-      sortList.push_back(preList[i]);
+    for (int i = 0; i < preList.size(); ++i)
+    {
+        if (mat(bitPosRow, preList[i]) == root[preList[i]])
+        {
+            sortList.push_back(preList[i]);
+        }
     }
-  }
-  // cout << "exit sortList = ";
-  // DumpIntVec( sortList );
+    //cout << "exit sortList = ";
+    //DumpIntVec( sortList );
 }
 
-void PhylogenyTree ::RemoveDupSites(const BinaryMatrix &mat,
-                                    vector<int> &sortedPosList,
-                                    vector<vector<int> > &duplicates) {
-  // This function takes the sorted list, and then remove the duplicate sites
-  // by comparing one site to its left row, if duplicate, do not put into new
-  // list
-  vector<int> noDupList;
-  if (sortedPosList.size() > 0) {
-    noDupList.push_back(sortedPosList[0]);
-  }
-  vector<int> dupList; // store which sites are duplicates to this one
-  for (int i = 1; i < sortedPosList.size(); ++i) {
-    bool match = true;
-    // Check to see if this column is the same as its immediate left one
-    for (int r = 0; r < mat.GetRowNum(); ++r) {
-      if (mat(r, sortedPosList[i]) != mat(r, sortedPosList[i - 1])) {
-        match = false;
-        break;
-      }
+void PhylogenyTree ::RemoveDupSites(const BinaryMatrix &mat, vector<int> &sortedPosList,
+                                    vector<vector<int>> &duplicates)
+{
+    // This function takes the sorted list, and then remove the duplicate sites
+    // by comparing one site to its left row, if duplicate, do not put into new list
+    vector<int> noDupList;
+    if (sortedPosList.size() > 0)
+    {
+        noDupList.push_back(sortedPosList[0]);
     }
-    if (match == false) {
-      noDupList.push_back(sortedPosList[i]);
-
-      // Now we maintian the duplicate list
-      // cout << "for site " << noDupList[noDupList.size() - 2] << ", duplicate
-      // sites are: "; DumpIntVec( dupList );
-
-      duplicates.push_back(dupList);
-      dupList.clear();
-    } else {
-      // This site is the same as its immediate left one
-      dupList.push_back(sortedPosList[i]);
+    vector<int> dupList; // store which sites are duplicates to this one
+    for (int i = 1; i < sortedPosList.size(); ++i)
+    {
+        bool match = true;
+        // Check to see if this column is the same as its immediate left one
+        for (int r = 0; r < mat.GetRowNum(); ++r)
+        {
+            if (mat(r, sortedPosList[i]) != mat(r, sortedPosList[i - 1]))
+            {
+                match = false;
+                break;
+            }
+        }
+        if (match == false)
+        {
+            noDupList.push_back(sortedPosList[i]);
+
+            // Now we maintian the duplicate list
+            //cout << "for site " << noDupList[noDupList.size() - 2] << ", duplicate sites are: ";
+            //DumpIntVec( dupList );
+
+            duplicates.push_back(dupList);
+            dupList.clear();
+        }
+        else
+        {
+            // This site is the same as its immediate left one
+            dupList.push_back(sortedPosList[i]);
+        }
     }
-  }
 
-  // Finally, add the final list to it
-  duplicates.push_back(dupList);
-  // cout << "for site " << noDupList[noDupList.size() - 1] << ", duplicate
-  // sites are: "; DumpIntVec( dupList );
-  dupList.clear();
+    // Finally, add the final list to it
+    duplicates.push_back(dupList);
+    //cout << "for site " << noDupList[noDupList.size() - 1] << ", duplicate sites are: ";
+    //DumpIntVec( dupList );
+    dupList.clear();
 
-  // Now set the noDupList to result
-  sortedPosList.clear();
-  sortedPosList = noDupList;
+    // Now set the noDupList to result
+    sortedPosList.clear();
+    sortedPosList = noDupList;
 }
 
-void PhylogenyTree ::ComputeLijLj(const BinaryMatrix &mat,
-                                  const vector<int> &root,
-                                  const vector<int> &sortedPosList,
-                                  vector<int *> &Lij, vector<int> &Lj) {
-  //    cout << "sortedPosList = ";
-  //    DumpIntVec( sortedPosList );
-
-  // Build Lij and Lj according to the algorithm
-  // CAUTION: you have to keep in mind that Lij, Lj are all based on M', not M
-  // so do a conversion before use
-  for (int i = 0; i < mat.GetRowNum(); ++i) {
-    int last1Pos = -1;
-    for (int j = 0; j < sortedPosList.size(); ++j) {
-      if (mat(i, sortedPosList[j]) != root[sortedPosList[j]]) {
-        // We find a one here, good
-        Lij[i][j] = last1Pos;
-
-        // cout << "at (" << i << ", " << j << "), Lij = " << last1Pos << endl;
-
-        // Remember it
-        last1Pos = j;
-      }
+void PhylogenyTree ::ComputeLijLj(const BinaryMatrix &mat, const vector<int> &root, const vector<int> &sortedPosList,
+                                  vector<int *> &Lij, vector<int> &Lj)
+{
+    //    cout << "sortedPosList = ";
+    //    DumpIntVec( sortedPosList );
+
+    // Build Lij and Lj according to the algorithm
+    // CAUTION: you have to keep in mind that Lij, Lj are all based on M', not M
+    // so do a conversion before use
+    for (int i = 0; i < mat.GetRowNum(); ++i)
+    {
+        int last1Pos = -1;
+        for (int j = 0; j < sortedPosList.size(); ++j)
+        {
+            if (mat(i, sortedPosList[j]) != root[sortedPosList[j]])
+            {
+                // We find a one here, good
+                Lij[i][j] = last1Pos;
+
+                //cout << "at (" << i << ", " << j << "), Lij = " << last1Pos << endl;
+
+                // Remember it
+                last1Pos = j;
+            }
+        }
     }
-  }
-
-  // Now we computes the Lj vector
-  Lj.clear();
-  for (int j = 0; j < sortedPosList.size(); ++j) {
-    int max = -1;
-    for (int r = 0; r < mat.GetRowNum(); ++r) {
-      if (mat(r, sortedPosList[j]) != root[sortedPosList[j]] &&
-          Lij[r][j] > max) {
-        max = Lij[r][j];
-      }
+
+    // Now we computes the Lj vector
+    Lj.clear();
+    for (int j = 0; j < sortedPosList.size(); ++j)
+    {
+        int max = -1;
+        for (int r = 0; r < mat.GetRowNum(); ++r)
+        {
+            if (mat(r, sortedPosList[j]) != root[sortedPosList[j]] && Lij[r][j] > max)
+            {
+                max = Lij[r][j];
+            }
+        }
+        // Now set Lj
+        Lj.push_back(max);
+        //cout << "At j = " << j << ", Lj = " << max << endl;
     }
-    // Now set Lj
-    Lj.push_back(max);
-    // cout << "At j = " << j << ", Lj = " << max << endl;
-  }
 }
 
-bool PhylogenyTree ::ExamineLijLj(const BinaryMatrix &mat,
-                                  const vector<int> &root,
-                                  const vector<int> &sortedPosList,
-                                  const vector<int *> &Lij,
-                                  const vector<int> &Lj) {
-  // cout << "Examine here...\n";
-  for (int i = 0; i < mat.GetRowNum(); ++i) {
-    for (int j = 0; j < sortedPosList.size(); ++j) {
-      if (mat(i, sortedPosList[j]) != root[sortedPosList[j]] &&
-          Lj[j] != Lij[i][j]) {
-        // cout << "At (" << i << ", " << j << "), Lij = " << Lij[i][j] << ",
-        // but Lj = " << Lj[j] << endl;
-        return false;
-      }
+bool PhylogenyTree ::ExamineLijLj(const BinaryMatrix &mat, const vector<int> &root, const vector<int> &sortedPosList,
+                                  const vector<int *> &Lij, const vector<int> &Lj)
+{
+    //cout << "Examine here...\n";
+    for (int i = 0; i < mat.GetRowNum(); ++i)
+    {
+        for (int j = 0; j < sortedPosList.size(); ++j)
+        {
+            if (mat(i, sortedPosList[j]) != root[sortedPosList[j]] && Lj[j] != Lij[i][j])
+            {
+                //cout << "At (" << i << ", " << j << "), Lij = " << Lij[i][j] << ", but Lj = " << Lj[j] << endl;
+                return false;
+            }
+        }
     }
-  }
-  // cout << "done here.\n";
-  return true; // yes, there is a tree
+    //cout << "done here.\n";
+    return true; // yes, there is a tree
 }
 
-void PhylogenyTree ::BuildTree(const BinaryMatrix &mat, const vector<int> &root,
-                               const vector<int> &sortedPosList,
-                               const vector<vector<int> > &duplicates,
-                               const vector<int> &Lj) {
-  // This function creates the tree by creating and linking tree nodes
-  // Make sure the tree is empty
-  if (rootNode != NULL) {
-    delete rootNode;
-    rootNode = NULL;
-  }
-
-  // root is labeled as -1, since all other (column) nodes are labeled by a site
-  rootNode = new TreeNode(-1);
-
-  // Create a node for each site
-  vector<TreeNode *> colNodes;
-  for (int i = 0; i < sortedPosList.size(); ++i) {
-    TreeNode *pNode =
-        new TreeNode(sortedPosList[i]); // for now, use original labels to do it
-    colNodes.push_back(pNode);
-  }
-
-  // Link each node Nj (where L(j) >= 0) to that L(j) node
-  for (int j = 0; j < Lj.size(); ++j) {
-    // Figure out the labels
-    vector<int> labels;
-    labels.push_back(sortedPosList[j]);
-    // Add those in the duplicates
-    for (int dup = 0; dup < duplicates[j].size(); ++dup) {
-      labels.push_back(duplicates[j][dup]);
+void PhylogenyTree ::BuildTree(const BinaryMatrix &mat, const vector<int> &root, const vector<int> &sortedPosList,
+                               const vector<vector<int>> &duplicates, const vector<int> &Lj)
+{
+    // This function creates the tree by creating and linking tree nodes
+    // Make sure the tree is empty
+    if (rootNode != NULL)
+    {
+        delete rootNode;
+        rootNode = NULL;
     }
-    if (Lj[j] >= 0) {
-      // Link it
-      TreeNode *nodeLj = colNodes[Lj[j]];
-
-      // Add it
-      nodeLj->AddChild(colNodes[j], labels);
-      // cout << "Add col node " << sortedPosList[j]  << " under node " <<
-      // sortedPosList[ Lj[j] ] << ".\n";
-    } else {
-      // For this node, we link it from the root
-      rootNode->AddChild(colNodes[j], labels);
-      // cout << "Add col node " << sortedPosList[j]  << " under root.\n";
+
+    // root is labeled as -1, since all other (column) nodes are labeled by a site
+    rootNode = new TreeNode(-1);
+
+    // Create a node for each site
+    vector<TreeNode *> colNodes;
+    for (int i = 0; i < sortedPosList.size(); ++i)
+    {
+        TreeNode *pNode = new TreeNode(sortedPosList[i]); // for now, use original labels to do it
+        colNodes.push_back(pNode);
     }
-  }
-
-  // Now add rows into this tree
-  for (int i = 0; i < mat.GetRowNum(); ++i) {
-    int ci = -1;
-    // Find ci that is the largest cell has one in row i
-    for (int j = sortedPosList.size() - 1; j >= 0; j--) {
-      if (mat(i, sortedPosList[j]) != root[sortedPosList[j]]) {
-        ci = j;
-        break;
-      }
+
+    // Link each node Nj (where L(j) >= 0) to that L(j) node
+    for (int j = 0; j < Lj.size(); ++j)
+    {
+        // Figure out the labels
+        vector<int> labels;
+        labels.push_back(sortedPosList[j]);
+        // Add those in the duplicates
+        for (int dup = 0; dup < duplicates[j].size(); ++dup)
+        {
+            labels.push_back(duplicates[j][dup]);
+        }
+        if (Lj[j] >= 0)
+        {
+            // Link it
+            TreeNode *nodeLj = colNodes[Lj[j]];
+
+            // Add it
+            nodeLj->AddChild(colNodes[j], labels);
+            //cout << "Add col node " << sortedPosList[j]  << " under node " << sortedPosList[ Lj[j] ] << ".\n";
+        }
+        else
+        {
+            // For this node, we link it from the root
+            rootNode->AddChild(colNodes[j], labels);
+            //cout << "Add col node " << sortedPosList[j]  << " under root.\n";
+        }
     }
-    if (ci < 0) {
-      //    cout << "trouble here.\n";
-      //    YW_ASSERT(false);
-      // This is the same as the root sequence
-      TreeNode *pLeaf =
-          new TreeNode(mat.GetColNum() + i); // Use id=row index + colNum
-      pLeaf->AddNodeValue(i);
-      // also set its label
-      char buf[100], buf1[100];
-      sprintf(buf, "(%d)", i);
-      sprintf(buf1, "%d", i);
-      pLeaf->SetLabel(buf);
-      pLeaf->SetUserLabel(buf1);
-
-      vector<int> emptyLabel;
-      rootNode->AddChild(pLeaf, emptyLabel);
-      // cout << "Add row " << i << " under root node.\n";
-
-    } else {
-      // Here we always add a node as children. CAUTION: here we may create
-      // degree-2 nodes, we need to cleanup after this 06/05/05: actually I
-      // decided to go another way: put to leaf first, then splits the multiple
-      // labels into different leaves if needed
-      TreeNode *pn = colNodes[ci];
-      if (pn->IsLeaf() == true) {
-        // also set its label
-        char buf[100], buf1[100];
-        sprintf(buf, "(%d)", i);
-        sprintf(buf1, "%d", i);
-        pn->SetLabel(buf);
-        pn->SetUserLabel(buf1);
-
-        // Now attach this row to the existing leaf, HOW?
-        pn->AddNodeValue(i);
-        // cout << "Add row " << i << " to a leaf (col node) " <<
-        // sortedPosList[ci]  << ".\n";
-
-      } else {
-        TreeNode *pLeaf =
-            new TreeNode(mat.GetColNum() + i); // Use id=row index + colNum
-        pLeaf->AddNodeValue(i);
-        // also set its label
-        char buf[100], buf1[100];
-        sprintf(buf, "(%d)", i);
-        sprintf(buf1, "%d", i);
-        pLeaf->SetLabel(buf);
-        pLeaf->SetUserLabel(buf1);
-
-        vector<int> emptyLabel;
-        pn->AddChild(pLeaf, emptyLabel);
-        // cout << "Add row " << i << " to a non-leaf (col node) " <<
-        // sortedPosList[ci]  << ".\n";
-      }
+
+    // Now add rows into this tree
+    for (int i = 0; i < mat.GetRowNum(); ++i)
+    {
+        int ci = -1;
+        // Find ci that is the largest cell has one in row i
+        for (int j = sortedPosList.size() - 1; j >= 0; j--)
+        {
+            if (mat(i, sortedPosList[j]) != root[sortedPosList[j]])
+            {
+                ci = j;
+                break;
+            }
+        }
+        if (ci < 0)
+        {
+            //    cout << "trouble here.\n";
+            //    YW_ASSERT(false);
+            // This is the same as the root sequence
+            TreeNode *pLeaf = new TreeNode(mat.GetColNum() + i); // Use id=row index + colNum
+            pLeaf->AddNodeValue(i);
+            // also set its label
+            char buf[100], buf1[100];
+            sprintf(buf, "(%d)", i);
+            sprintf(buf1, "%d", i);
+            pLeaf->SetLabel(buf);
+            pLeaf->SetUserLabel(buf1);
+
+            vector<int> emptyLabel;
+            rootNode->AddChild(pLeaf, emptyLabel);
+            //cout << "Add row " << i << " under root node.\n";
+        }
+        else
+        {
+            // Here we always add a node as children. CAUTION: here we may create degree-2 nodes,
+            // we need to cleanup after this
+            // 06/05/05: actually I decided to go another way: put to leaf first, then splits the
+            // multiple labels into different leaves if needed
+            TreeNode *pn = colNodes[ci];
+            if (pn->IsLeaf() == true)
+            {
+                // also set its label
+                char buf[100], buf1[100];
+                sprintf(buf, "(%d)", i);
+                sprintf(buf1, "%d", i);
+                pn->SetLabel(buf);
+                pn->SetUserLabel(buf1);
+
+                // Now attach this row to the existing leaf, HOW?
+                pn->AddNodeValue(i);
+                //cout << "Add row " << i << " to a leaf (col node) " << sortedPosList[ci]  << ".\n";
+            }
+            else
+            {
+                TreeNode *pLeaf = new TreeNode(mat.GetColNum() + i); // Use id=row index + colNum
+                pLeaf->AddNodeValue(i);
+                // also set its label
+                char buf[100], buf1[100];
+                sprintf(buf, "(%d)", i);
+                sprintf(buf1, "%d", i);
+                pLeaf->SetLabel(buf);
+                pLeaf->SetUserLabel(buf1);
+
+                vector<int> emptyLabel;
+                pn->AddChild(pLeaf, emptyLabel);
+                //cout << "Add row " << i << " to a non-leaf (col node) " << sortedPosList[ci]  << ".\n";
+            }
+        }
     }
-  }
 }
 
-void PhylogenyTree ::CleanupTree(const BinaryMatrix &mat) {
-  // 06/05/05: take another route, breakup multiple labels
-  TreeNode *curTN = NULL;
-  stack<TreeNode *> stackNodes;
-  if (rootNode != NULL) {
-    stackNodes.push(rootNode);
-  }
-
-  while (stackNodes.empty() == false) {
-    // Move to next node in stack
-    curTN = stackNodes.top();
-    stackNodes.pop();
-
-    // For a leaf, we try to split it
-    if (curTN->IsLeaf() == true && curTN->nodeValues.size() > 1) {
-      for (int i = 0; i < curTN->nodeValues.size(); ++i) {
-        // Find one to split
-        TreeNode *pLeaf =
-            new TreeNode(mat.GetColNum() +
-                         curTN->nodeValues[i]); // Use id=row index + colNum
-        pLeaf->AddNodeValue(curTN->nodeValues[i]);
-        vector<int> emptyLabel;
-        curTN->AddChild(pLeaf, emptyLabel);
-        // cout << "Spliting row " << curTN->nodeValues[i] << " from leaf " <<
-        // curTN->id  << ".\n";
-
-        // Set the label to the individual nodes values
-        char buf[100], buf1[100];
-        sprintf(buf, "(%d)", curTN->nodeValues[i]);
-        sprintf(buf1, "%d", curTN->nodeValues[i]);
-        pLeaf->SetLabel(buf);
-        pLeaf->SetUserLabel(buf1);
-      }
-
-      // Finally, clear the labels at parent node
-      curTN->nodeValues.clear();
-
-      // We also clear the old label
-      curTN->SetLabel("-");
-      curTN->SetUserLabel("-");
+void PhylogenyTree ::CleanupTree(const BinaryMatrix &mat)
+{
+    // 06/05/05: take another route, breakup multiple labels
+    TreeNode *curTN = NULL;
+    stack<TreeNode *> stackNodes;
+    if (rootNode != NULL)
+    {
+        stackNodes.push(rootNode);
     }
 
-    // push children into stack
-    for (int i = 0; i < curTN->listChildren.size(); ++i) {
-      stackNodes.push(curTN->listChildren[i]);
+    while (stackNodes.empty() == false)
+    {
+        // Move to next node in stack
+        curTN = stackNodes.top();
+        stackNodes.pop();
+
+        // For a leaf, we try to split it
+        if (curTN->IsLeaf() == true && curTN->nodeValues.size() > 1)
+        {
+            for (int i = 0; i < curTN->nodeValues.size(); ++i)
+            {
+                // Find one to split
+                TreeNode *pLeaf = new TreeNode(mat.GetColNum() + curTN->nodeValues[i]); // Use id=row index + colNum
+                pLeaf->AddNodeValue(curTN->nodeValues[i]);
+                vector<int> emptyLabel;
+                curTN->AddChild(pLeaf, emptyLabel);
+                //cout << "Spliting row " << curTN->nodeValues[i] << " from leaf " << curTN->id  << ".\n";
+
+                // Set the label to the individual nodes values
+                char buf[100], buf1[100];
+                sprintf(buf, "(%d)", curTN->nodeValues[i]);
+                sprintf(buf1, "%d", curTN->nodeValues[i]);
+                pLeaf->SetLabel(buf);
+                pLeaf->SetUserLabel(buf1);
+            }
+
+            // Finally, clear the labels at parent node
+            curTN->nodeValues.clear();
+
+            // We also clear the old label
+            curTN->SetLabel("-");
+            curTN->SetUserLabel("-");
+        }
+
+        // push children into stack
+        for (int i = 0; i < curTN->listChildren.size(); ++i)
+        {
+            stackNodes.push(curTN->listChildren[i]);
+        }
     }
-  }
 }
 
-void PhylogenyTree ::RemoveDegreeTwoNodes() {
-  // This function removes all degree-2 nodes
-  // we start from the root and remove any node with degree 2
-  TreeNode *curTN = NULL;
-  stack<TreeNode *> stackNodes;
-  if (rootNode != NULL) {
-    stackNodes.push(rootNode);
-  }
-
-  while (stackNodes.empty() == false) {
-    // Move to next node in stack
-    curTN = stackNodes.top();
-    stackNodes.pop();
-
-    // push children into stack
-    for (int i = 0; i < curTN->listChildren.size(); ++i) {
-      stackNodes.push(curTN->listChildren[i]);
+void PhylogenyTree ::RemoveDegreeTwoNodes()
+{
+    // This function removes all degree-2 nodes
+    // we start from the root and remove any node with degree 2
+    TreeNode *curTN = NULL;
+    stack<TreeNode *> stackNodes;
+    if (rootNode != NULL)
+    {
+        stackNodes.push(rootNode);
     }
 
-    // any node, if it has only a single child, remove the current node
-    if (curTN->IsLeaf() == false && curTN->GetChildrenNum() == 1) {
-      // remove it
-      TreeNode *pcnode = curTN->listChildren[0];
-      TreeNode *ppar = curTN->GetParent();
-
-      vector<int> listLblpn;
-      curTN->GetEdgeLabelsAtBranch(0, listLblpn);
-
-      // change cur's par if exist
-      if (ppar != NULL) {
-        // construct the concatnated label list
-        int pindex = ppar->GetChildIndex(curTN);
-        vector<int> listLblpn2;
-        ppar->GetEdgeLabelsAtBranch(pindex, listLblpn2);
-        AppendIntVec(listLblpn, listLblpn2);
-
-        // here need to maintian the edge labesl
-        ppar->RemoveChild(curTN);
-        // vector<int> labelsEmpty;
-        ppar->AddChild(pcnode, listLblpn);
-      } else {
-        // cur node is root, then change the root
-        YW_ASSERT_INFO(curTN == rootNode, "Must be root");
-        rootNode = pcnode;
-      }
-
-      // set new parent
-      pcnode->SetParent(ppar);
+    while (stackNodes.empty() == false)
+    {
+        // Move to next node in stack
+        curTN = stackNodes.top();
+        stackNodes.pop();
+
+        // push children into stack
+        for (int i = 0; i < curTN->listChildren.size(); ++i)
+        {
+            stackNodes.push(curTN->listChildren[i]);
+        }
+
+        // any node, if it has only a single child, remove the current node
+        if (curTN->IsLeaf() == false && curTN->GetChildrenNum() == 1)
+        {
+            // remove it
+            TreeNode *pcnode = curTN->listChildren[0];
+            TreeNode *ppar = curTN->GetParent();
+
+            vector<int> listLblpn;
+            curTN->GetEdgeLabelsAtBranch(0, listLblpn);
+
+            // change cur's par if exist
+            if (ppar != NULL)
+            {
+                // construct the concatnated label list
+                int pindex = ppar->GetChildIndex(curTN);
+                vector<int> listLblpn2;
+                ppar->GetEdgeLabelsAtBranch(pindex, listLblpn2);
+                AppendIntVec(listLblpn, listLblpn2);
+
+                // here need to maintian the edge labesl
+                ppar->RemoveChild(curTN);
+                //vector<int> labelsEmpty;
+                ppar->AddChild(pcnode, listLblpn);
+            }
+            else
+            {
+                // cur node is root, then change the root
+                YW_ASSERT_INFO(curTN == rootNode, "Must be root");
+                rootNode = pcnode;
+            }
+
+            // set new parent
+            pcnode->SetParent(ppar);
+        }
     }
-  }
 }
 
 // ***************************************************************************
 // Main functions
 // ***************************************************************************
 
-PhylogenyTree ::PhylogenyTree() {}
-
-PhylogenyTree ::~PhylogenyTree() {}
-
-bool PhylogenyTree ::ConsOnBinMatrix(const BinaryMatrix &mat) {
-  // Build tree from binary matrix
-  vector<int> sortedPosList;
-
-  // We first find a good root from data
-  vector<int> root;
-  GetARoot(mat, root);
-
-  // We first sort columns (treated as binary number) by putting the largest
-  // first
-  RadixSortByCol(mat, root, sortedPosList);
-
-  // cout << "the sorted column list is: \n";
-  // DumpIntVec( sortedPosList);
-
-  // Remove Duplicate columns
-  vector<vector<int> > listDuplicates; // used to save for each one in
-                                       // sortedPosList the sites to its right
-                                       // that is duplicate as it, in ORIGINAL
-                                       // numbering
-  RemoveDupSites(mat, sortedPosList, listDuplicates);
-  // cout << "the no duplicate sorted column list is: \n";
-  // DumpIntVec( sortedPosList);
-
-  // Now we compute the Lij and Lj values, from Gusfield's algorithm
-  vector<int *> Lij;
-  for (int i = 0; i < mat.GetRowNum(); ++i) {
-    int *pbuf = new int[sortedPosList.size()];
-    Lij.push_back(pbuf);
-  }
-  vector<int> Lj;
-  ComputeLijLj(mat, root, sortedPosList, Lij, Lj);
-  if (ExamineLijLj(mat, root, sortedPosList, Lij, Lj) == false) {
-    cout << "No tree.\n";
-    return false; // no tree
-  }
-  // Now we start to build tree here
-  BuildTree(mat, root, sortedPosList, listDuplicates, Lj);
-  // cout << "Yes, there is a tree here.\n";
-
-  // Finally, we cleanup
-  CleanupTree(mat);
-
-  // Now we have to do cleanup
-  for (int i = 0; i < Lij.size(); ++i) {
-    delete[] Lij[i];
-  }
-
-  return true;
+PhylogenyTree ::PhylogenyTree()
+{
 }
 
-void PhylogenyTree ::GetLeavesWithMatRowIndices(const set<int> &setMatRows,
-                                                set<TreeNode *> &setLeaves) {
-  // cout << "GetLeavesWithMatRowIndices: setMatRows = ";
-  // DumpIntSet( setMatRows );
-  // given a set of row indices in mat (assume this is one where phylogeny is
-  // constructed)
-  set<string> setLabel;
-  for (set<int>::iterator it = setMatRows.begin(); it != setMatRows.end();
-       ++it) {
-    // use the same naming convention
-    char buf[100];
-    // sprintf(buf, "%d", *it);
-    sprintf(buf, "(%d)", *it);
-    string lbl(buf);
-    setLabel.insert(lbl);
-  }
-  GetLeavesWithLabels(setLabel, setLeaves);
+PhylogenyTree ::~PhylogenyTree()
+{
 }
 
-// ***************************************************************************
+bool PhylogenyTree ::ConsOnBinMatrix(const BinaryMatrix &mat)
+{
+    // Build tree from binary matrix
+    vector<int> sortedPosList;
+
+    // We first find a good root from data
+    vector<int> root;
+    GetARoot(mat, root);
+
+    // We first sort columns (treated as binary number) by putting the largest first
+    RadixSortByCol(mat, root, sortedPosList);
 
-string ConsRootedPerfectPhylogenyFromMat(const BinaryMatrix &matInput,
-                                         bool fEdgeLabel, bool fOneBase) {
-  // constructed tree assuming zero-rooted tree
-  // collect rooted splits
-  set<set<int> > setRootedSplits;
-  map<set<int>, set<int> > mapSplitSites;
-  set<int> setAll1sSites;
-  for (int s = 0; s < matInput.GetColNum(); ++s) {
-    set<int> split;
-    matInput.GetRowsWithAllele(s, 1, split);
-    mapSplitSites[split].insert(s + 1); // let site start from index 1
-    setRootedSplits.insert(split);
-
-    if (split.size() == matInput.GetRowNum()) {
-      setAll1sSites.insert(s);
+    //cout << "the sorted column list is: \n";
+    //DumpIntVec( sortedPosList);
+
+    // Remove Duplicate columns
+    vector<vector<int>> listDuplicates; // used to save for each one in sortedPosList
+                                        // the sites to its right that is duplicate as it, in ORIGINAL numbering
+    RemoveDupSites(mat, sortedPosList, listDuplicates);
+    //cout << "the no duplicate sorted column list is: \n";
+    //DumpIntVec( sortedPosList);
+
+    // Now we compute the Lij and Lj values, from Gusfield's algorithm
+    vector<int *> Lij;
+    for (int i = 0; i < mat.GetRowNum(); ++i)
+    {
+        int *pbuf = new int[sortedPosList.size()];
+        Lij.push_back(pbuf);
     }
+    vector<int> Lj;
+    ComputeLijLj(mat, root, sortedPosList, Lij, Lj);
+    if (ExamineLijLj(mat, root, sortedPosList, Lij, Lj) == false)
+    {
+        cout << "No tree.\n";
+        return false; // no tree
+    }
+    // Now we start to build tree here
+    BuildTree(mat, root, sortedPosList, listDuplicates, Lj);
+    //cout << "Yes, there is a tree here.\n";
 
-    // cout << "Site " << s << " split: ";
-    // DumpIntSet(split);
-  }
+    // Finally, we cleanup
+    CleanupTree(mat);
 
-  // cout << "Set of all-1 sites: ";
-  // DumpIntSet(setAll1sSites);
+    // Now we have to do cleanup
+    for (int i = 0; i < Lij.size(); ++i)
+    {
+        delete[] Lij[i];
+    }
+
+    return true;
+}
+
+void PhylogenyTree ::GetLeavesWithMatRowIndices(const set<int> &setMatRows, set<TreeNode *> &setLeaves)
+{
+    //cout << "GetLeavesWithMatRowIndices: setMatRows = ";
+    //DumpIntSet( setMatRows );
+    // given a set of row indices in mat (assume this is one where phylogeny is constructed)
+    set<string> setLabel;
+    for (set<int>::iterator it = setMatRows.begin(); it != setMatRows.end(); ++it)
+    {
+        // use the same naming convention
+        char buf[100];
+        //sprintf(buf, "%d", *it);
+        sprintf(buf, "(%d)", *it);
+        string lbl(buf);
+        setLabel.insert(lbl);
+    }
+    GetLeavesWithLabels(setLabel, setLeaves);
+}
+
+// ***************************************************************************
+
+string ConsRootedPerfectPhylogenyFromMat(const BinaryMatrix &matInput, bool fEdgeLabel, bool fOneBase)
+{
+    // constructed tree assuming zero-rooted tree
+    // collect rooted splits
+    set<set<int>> setRootedSplits;
+    map<set<int>, set<int>> mapSplitSites;
+    set<int> setAll1sSites;
+    for (int s = 0; s < matInput.GetColNum(); ++s)
+    {
+        set<int> split;
+        matInput.GetRowsWithAllele(s, 1, split);
+        mapSplitSites[split].insert(s + 1); // let site start from index 1
+        setRootedSplits.insert(split);
+
+        if (split.size() == matInput.GetRowNum())
+        {
+            setAll1sSites.insert(s);
+        }
+
+        //cout << "Site " << s << " split: ";
+        //DumpIntSet(split);
+    }
+
+    //cout << "Set of all-1 sites: ";
+    //DumpIntSet(setAll1sSites);
 
 #if 0
     vector<string> listSiteNames;
@@ -550,61 +590,72 @@ string ConsRootedPerfectPhylogenyFromMat(const BinaryMatrix &matInput,
     }
 #endif
 
-  //
-  PhylogenyTreeBasic tree;
-  CreatePhyTreeWithRootedSplits(tree, matInput.GetRowNum(), setRootedSplits);
-
-  // setup edge labels if needed
-  if (fEdgeLabel) {
-    tree.RemoveEdgeLabels();
     //
-    vector<TreeNode *> listNodes;
-    tree.GetAllNodes(listNodes);
-    for (int i = 0; i < (int)listNodes.size(); ++i) {
-      if (listNodes[i]->IsLeaf()) {
-        continue;
-      }
-      // check all children
-      for (int j = 0; j < listNodes[i]->GetChildrenNum(); ++j) {
-        TreeNode *pChild = listNodes[i]->GetChild(j);
-        set<int> setLeavesUnder;
-        pChild->GetAllLeavesIdUnder(setLeavesUnder);
-        // cout << "The " << j << " th child: leaves under: ";
-        // DumpIntSet(setLeavesUnder);
-        if (mapSplitSites.find(setLeavesUnder) != mapSplitSites.end()) {
-          set<int> setEdgeLbels = mapSplitSites[setLeavesUnder];
-          for (set<int>::iterator it = setEdgeLbels.begin();
-               it != setEdgeLbels.end(); ++it) {
-            listNodes[i]->AddEdgeLabelToChild(j, *it);
-          }
+    PhylogenyTreeBasic tree;
+    CreatePhyTreeWithRootedSplits(tree, matInput.GetRowNum(), setRootedSplits);
+
+    // setup edge labels if needed
+    if (fEdgeLabel)
+    {
+        tree.RemoveEdgeLabels();
+        //
+        vector<TreeNode *> listNodes;
+        tree.GetAllNodes(listNodes);
+        for (int i = 0; i < (int)listNodes.size(); ++i)
+        {
+            if (listNodes[i]->IsLeaf())
+            {
+                continue;
+            }
+            // check all children
+            for (int j = 0; j < listNodes[i]->GetChildrenNum(); ++j)
+            {
+                TreeNode *pChild = listNodes[i]->GetChild(j);
+                set<int> setLeavesUnder;
+                pChild->GetAllLeavesIdUnder(setLeavesUnder);
+                //cout << "The " << j << " th child: leaves under: ";
+                //DumpIntSet(setLeavesUnder);
+                if (mapSplitSites.find(setLeavesUnder) != mapSplitSites.end())
+                {
+                    set<int> setEdgeLbels = mapSplitSites[setLeavesUnder];
+                    for (set<int>::iterator it = setEdgeLbels.begin(); it != setEdgeLbels.end(); ++it)
+                    {
+                        listNodes[i]->AddEdgeLabelToChild(j, *it);
+                    }
+                }
+            }
         }
-      }
     }
-  }
 
-  if (fOneBase) {
-    map<int, int> mapIncLeafLbls;
-    for (int i = 0; i < matInput.GetRowNum(); ++i) {
-      mapIncLeafLbls[i] = i + 1;
+    if (fOneBase)
+    {
+        map<int, int> mapIncLeafLbls;
+        for (int i = 0; i < matInput.GetRowNum(); ++i)
+        {
+            mapIncLeafLbls[i] = i + 1;
+        }
+        ChangeLeafIntLabelOfTree(tree, mapIncLeafLbls);
     }
-    ChangeLeafIntLabelOfTree(tree, mapIncLeafLbls);
-  }
-
-  string res;
-  if (fEdgeLabel == false) {
-    tree.ConsNewick(res);
-  } else {
-    tree.ConsNewickEdgeLabel(res);
-    if (setAll1sSites.size() > 0) {
-      res += ":";
-      // add all-1 labels at the top
-      for (set<int>::iterator it = setAll1sSites.begin();
-           it != setAll1sSites.end(); ++it) {
-        int ss = *it;
-        string strId = std::to_string(ss + 1);
-        res += "#" + strId;
-      }
+
+    string res;
+    if (fEdgeLabel == false)
+    {
+        tree.ConsNewick(res);
+    }
+    else
+    {
+        tree.ConsNewickEdgeLabel(res);
+        if (setAll1sSites.size() > 0)
+        {
+            res += ":";
+            // add all-1 labels at the top
+            for (set<int>::iterator it = setAll1sSites.begin(); it != setAll1sSites.end(); ++it)
+            {
+                int ss = *it;
+                string strId = std::to_string(ss + 1);
+                res += "#" + strId;
+            }
+        }
     }
-  }
-  return res;
+    return res;
 }
diff --git a/trisicell/external/scistree/PhylogenyTree.h b/trisicell/external/scistree/PhylogenyTree.h
index b2043e8..92dde9a 100644
--- a/trisicell/external/scistree/PhylogenyTree.h
+++ b/trisicell/external/scistree/PhylogenyTree.h
@@ -1,24 +1,24 @@
 #ifndef PHYLOGENY_TREE_H
 #define PHYLOGENY_TREE_H
 
-#include <cstdio>
-#include <fstream>
 #include <iostream>
+#include <fstream>
+#include <cstdio>
+#include <vector>
 #include <set>
-#include <stack>
 #include <string>
-#include <vector>
+#include <stack>
 
-#include <cstdio>
-#include <cstdlib>
-#include <ctime>
 #include <sys/types.h>
 #include <time.h>
 #include <unistd.h>
+#include <ctime>
+#include <cstdlib>
+#include <cstdio>
 
-#include "BinaryMatrix.h"
-#include "PhylogenyTreeBasic.h"
 #include "Utils.h"
+#include "PhylogenyTreeBasic.h"
+#include "BinaryMatrix.h"
 
 using namespace std;
 
@@ -27,44 +27,35 @@ using namespace std;
 // the main purpose is to support building from matrix (perfect phylogeny)
 // ***************************************************************************
 
-class PhylogenyTree : public PhylogenyTreeBasic {
+class PhylogenyTree : public PhylogenyTreeBasic
+{
 public:
-  PhylogenyTree(); // Empty tree
-  virtual ~PhylogenyTree();
-  bool ConsOnBinMatrix(const BinaryMatrix &mat); // Build tree from binary
-                                                 // matrix
-  void SetRoot(const vector<int> &rootToSet) { knownRoot = rootToSet; }
-  void RemoveDegreeTwoNodes();
-  static int GetIntLabelFromParenthStr(const string &strLabelWParenth);
-  void GetLeavesWithMatRowIndices(const set<int> &setMatRows,
-                                  set<TreeNode *> &setLeaves);
+    PhylogenyTree(); // Empty tree
+    virtual ~PhylogenyTree();
+    bool ConsOnBinMatrix(const BinaryMatrix &mat); // Build tree from binary matrix
+    void SetRoot(const vector<int> &rootToSet) { knownRoot = rootToSet; }
+    void RemoveDegreeTwoNodes();
+    static int GetIntLabelFromParenthStr(const string &strLabelWParenth);
+    void GetLeavesWithMatRowIndices(const set<int> &setMatRows, set<TreeNode *> &setLeaves);
 
 private:
-  void GetARoot(const BinaryMatrix &mat, vector<int> &root);
-  void RadixSortByCol(const BinaryMatrix &mat, const vector<int> &root,
-                      vector<int> &sortList);
-  void SortByOneBit(int bitPosRow, const BinaryMatrix &mat,
-                    const vector<int> &root, vector<int> &sortList);
-  void RemoveDupSites(const BinaryMatrix &mat, vector<int> &sortedPosList,
-                      vector<vector<int> > &duplicates);
-  void ComputeLijLj(const BinaryMatrix &mat, const vector<int> &root,
-                    const vector<int> &sortedPosList, vector<int *> &Lij,
-                    vector<int> &Lj);
-  bool ExamineLijLj(const BinaryMatrix &mat, const vector<int> &root,
-                    const vector<int> &sortedPosList, const vector<int *> &Lij,
-                    const vector<int> &Lj);
-  void BuildTree(const BinaryMatrix &mat, const vector<int> &root,
-                 const vector<int> &sortedPosList,
-                 const vector<vector<int> > &duplicates, const vector<int> &Lj);
-  void CleanupTree(const BinaryMatrix &mat);
-
-  vector<int> knownRoot;
+    void GetARoot(const BinaryMatrix &mat, vector<int> &root);
+    void RadixSortByCol(const BinaryMatrix &mat, const vector<int> &root, vector<int> &sortList);
+    void SortByOneBit(int bitPosRow, const BinaryMatrix &mat, const vector<int> &root, vector<int> &sortList);
+    void RemoveDupSites(const BinaryMatrix &mat, vector<int> &sortedPosList, vector<vector<int>> &duplicates);
+    void ComputeLijLj(const BinaryMatrix &mat, const vector<int> &root, const vector<int> &sortedPosList,
+                      vector<int *> &Lij, vector<int> &Lj);
+    bool ExamineLijLj(const BinaryMatrix &mat, const vector<int> &root, const vector<int> &sortedPosList,
+                      const vector<int *> &Lij, const vector<int> &Lj);
+    void BuildTree(const BinaryMatrix &mat, const vector<int> &root, const vector<int> &sortedPosList,
+                   const vector<vector<int>> &duplicates, const vector<int> &Lj);
+    void CleanupTree(const BinaryMatrix &mat);
+
+    vector<int> knownRoot;
 };
 
 // ***************************************************************************
 
-string ConsRootedPerfectPhylogenyFromMat(const BinaryMatrix &matInput,
-                                         bool fEdgeLabel,
-                                         bool fOneBase = false);
+string ConsRootedPerfectPhylogenyFromMat(const BinaryMatrix &matInput, bool fEdgeLabel, bool fOneBase = false);
 
 #endif // PHYLOGENY_TREE_H
diff --git a/trisicell/external/scistree/PhylogenyTreeBasic.cpp b/trisicell/external/scistree/PhylogenyTreeBasic.cpp
index db2985c..6ba170c 100644
--- a/trisicell/external/scistree/PhylogenyTreeBasic.cpp
+++ b/trisicell/external/scistree/PhylogenyTreeBasic.cpp
@@ -1,248 +1,274 @@
+#include <stack>
+#include <fstream>
+#include <iostream>
+#include <cstdlib>
 #include "PhylogenyTreeBasic.h"
 #include "Utils3.h"
 #include "Utils4.h"
-#include <cstdlib>
-#include <fstream>
-#include <iostream>
-#include <stack>
 
 // ***************************************************************************
 // The following code is largely based on Gusfield's 1991 Paper
 // ***************************************************************************
 extern void OutputQuotedString(ofstream &outFile, const char *buf);
 
-string GetStringFromId(int id) {
-  char buf[100];
-  sprintf(buf, "%d", id);
-  return buf;
-}
-
-int GetNewickNumLeaves(const string &strNewick, char chSepLeft, char chSepRight,
-                       char midSep) {
-  // the number of leaves of newick is equal to the number of separator char
-  // plus one
-  int res = 0;
-  bool fCount = false; // only count when seeing left sep (
-  for (int i = 0; i < (int)strNewick.length(); ++i) {
-    if (strNewick[i] == chSepLeft) {
-      fCount = true;
-    } else if (strNewick[i] == chSepRight) {
-      if (fCount == true) {
-        // add the last one
-        res++;
-      }
-
-      fCount = false;
-    } else if (strNewick[i] == midSep) {
-      if (fCount == true) {
-        res++;
-      } else {
-        fCount = true;
-      }
-    }
-  }
-  return res;
-}
-
-bool GetTripleType(TreeNode *pn1, TreeNode *pn2, TreeNode *pn3,
-                   pair<pair<TreeNode *, TreeNode *>, TreeNode *> &triple) {
-  TreeNode *pmrca12 = pn1->GetMRCA(pn2);
-  TreeNode *pmrca13 = pn1->GetMRCA(pn3);
-  TreeNode *pmrca23 = pn2->GetMRCA(pn3);
-  //
-  int dummy;
-  if (pmrca13 != pmrca12) {
-    if (pmrca13->IsAncesterOf(pmrca12, dummy) == true) {
-      triple.first.first = pn1;
-      triple.first.second = pn2;
-      triple.second = pn3;
-      return true;
-    } else if (pmrca12->IsAncesterOf(pmrca13, dummy) == true) {
-      triple.first.first = pn1;
-      triple.first.second = pn3;
-      triple.second = pn2;
-      return true;
-    } else {
-      YW_ASSERT_INFO(false, "Impossible");
-    }
-  }
-  // if( pmrca23 != pmrca12 &&  pmrca12->IsAncesterOf(pmrca23, dummy) == true )
-  else if (pmrca23 != pmrca12) {
-    triple.first.first = pn1;
-    triple.first.second = pn2;
-    triple.second = pn3;
-    return true;
-  }
-  // triple not found
-  return false;
+string GetStringFromId(int id)
+{
+    char buf[100];
+    sprintf(buf, "%d", id);
+    return buf;
+}
+
+int GetNewickNumLeaves(const string &strNewick, char chSepLeft, char chSepRight, char midSep)
+{
+    // the number of leaves of newick is equal to the number of separator char plus one
+    int res = 0;
+    bool fCount = false; // only count when seeing left sep (
+    for (int i = 0; i < (int)strNewick.length(); ++i)
+    {
+        if (strNewick[i] == chSepLeft)
+        {
+            fCount = true;
+        }
+        else if (strNewick[i] == chSepRight)
+        {
+            if (fCount == true)
+            {
+                // add the last one
+                res++;
+            }
+
+            fCount = false;
+        }
+        else if (strNewick[i] == midSep)
+        {
+            if (fCount == true)
+            {
+                res++;
+            }
+            else
+            {
+                fCount = true;
+            }
+        }
+    }
+    return res;
+}
+
+bool GetTripleType(TreeNode *pn1, TreeNode *pn2, TreeNode *pn3, pair<pair<TreeNode *, TreeNode *>, TreeNode *> &triple)
+{
+    TreeNode *pmrca12 = pn1->GetMRCA(pn2);
+    TreeNode *pmrca13 = pn1->GetMRCA(pn3);
+    TreeNode *pmrca23 = pn2->GetMRCA(pn3);
+    //
+    int dummy;
+    if (pmrca13 != pmrca12)
+    {
+        if (pmrca13->IsAncesterOf(pmrca12, dummy) == true)
+        {
+            triple.first.first = pn1;
+            triple.first.second = pn2;
+            triple.second = pn3;
+            return true;
+        }
+        else if (pmrca12->IsAncesterOf(pmrca13, dummy) == true)
+        {
+            triple.first.first = pn1;
+            triple.first.second = pn3;
+            triple.second = pn2;
+            return true;
+        }
+        else
+        {
+            YW_ASSERT_INFO(false, "Impossible");
+        }
+    }
+    //if( pmrca23 != pmrca12 &&  pmrca12->IsAncesterOf(pmrca23, dummy) == true )
+    else if (pmrca23 != pmrca12)
+    {
+        triple.first.first = pn1;
+        triple.first.second = pn2;
+        triple.second = pn3;
+        return true;
+    }
+    // triple not found
+    return false;
 }
 
 // different from Marginal tree, we allow mulfurcating trees here
 // this can be convenient in some cases
-bool ReadinPhyloTreesNewick(ifstream &inFile, int numLeaves,
-                            vector<PhylogenyTreeBasic *> &treePtrList,
-                            TaxaMapper *pTMapper) {
-  // NOTE: RETURN TRUE IF NO LABEL ADJUSTMENT IS DONE
-  // RETURN FALSE IF WE SWITCHED LABEL BY DECREASING BY ONE
-  // figure out leave num
-  bool fNoChange = true;
-  int nLvs = numLeaves;
-
-  // read marginal trees in newick format
-  // here there is no preamble, one line per tree
-  while (inFile.eof() == false) {
-    // ensure the first char is '('; otherwise stop
-    char ch;
-    inFile >> ch;
-    inFile.putback(ch);
-    if (ch != '(') {
-      break;
-    }
-
-    string treeNewick;
-    inFile >> treeNewick;
-    if (treeNewick.size() == 0) {
-      break;
-    }
-    // cout << "newick tree = " << treeNewick << endl;
+bool ReadinPhyloTreesNewick(ifstream &inFile, int numLeaves, vector<PhylogenyTreeBasic *> &treePtrList, TaxaMapper *pTMapper)
+{
+    // NOTE: RETURN TRUE IF NO LABEL ADJUSTMENT IS DONE
+    // RETURN FALSE IF WE SWITCHED LABEL BY DECREASING BY ONE
+    // figure out leave num
+    bool fNoChange = true;
+    int nLvs = numLeaves;
+
+    // read marginal trees in newick format
+    // here there is no preamble, one line per tree
+    while (inFile.eof() == false)
+    {
+        // ensure the first char is '('; otherwise stop
+        char ch;
+        inFile >> ch;
+        inFile.putback(ch);
+        if (ch != '(')
+        {
+            break;
+        }
 
-    //#if 0
-    // update numleaves
-    multiset<string> setLabels;
-    NewickUtils ::RetrieveLabelSet(treeNewick, setLabels);
+        string treeNewick;
+        inFile >> treeNewick;
+        if (treeNewick.size() == 0)
+        {
+            break;
+        }
+        //cout << "newick tree = " << treeNewick << endl;
+
+        //#if 0
+        // update numleaves
+        multiset<string> setLabels;
+        NewickUtils ::RetrieveLabelSet(treeNewick, setLabels);
 #if 0
 for(multiset<string> :: iterator it22 = setLabels.begin(); it22 != setLabels.end(); ++it22)
 {
 cout << "Label found: " << *it22 << endl;
 }
 #endif
-    nLvs = setLabels.size();
-    //#endif
-    //
-    PhylogenyTreeBasic *pphTree = new PhylogenyTreeBasic;
-    // if( fDup == false )
-    //{
-    pphTree->ConsOnNewick(treeNewick, -1, false, pTMapper);
-    // cout << "Done phylogenetic tree construction...\n";
-    // pphTree->OutputGML("tmp.gml");
-    //}
-    // else
-    //{
-    //	phTree.ConsOnNewickDupLabels(treeNewick, pTMapper);
-    //}
+        nLvs = setLabels.size();
+        //#endif
+        //
+        PhylogenyTreeBasic *pphTree = new PhylogenyTreeBasic;
+        //if( fDup == false )
+        //{
+        pphTree->ConsOnNewick(treeNewick, -1, false, pTMapper);
+        //cout << "Done phylogenetic tree construction...\n";
+        //pphTree->OutputGML("tmp.gml");
+        //}
+        //else
+        //{
+        //	phTree.ConsOnNewickDupLabels(treeNewick, pTMapper);
+        //}
 
-    if (pTMapper != NULL) {
-      pTMapper->SetInitialized(true);
-    }
-    // string strTr;
-    // pphTree->ConsNewick(strTr);
-    // cout << "After reconstruction: strTr = " << strTr << endl;
-    // see if zero is in, if not, must have 1 and decrease by 1
-    set<int> lvids;
-    pphTree->GetLeaveIds(lvids);
-    // cout << "lvids : ";
-    // DumpIntSet( lvids );
-    int idInternal = lvids.size();
-    YW_ASSERT_INFO(lvids.find(0) != lvids.end(),
-                   "Must adjust leaf label first (to start with 0)");
+        if (pTMapper != NULL)
+        {
+            pTMapper->SetInitialized(true);
+        }
+        //string strTr;
+        //pphTree->ConsNewick(strTr);
+        //cout << "After reconstruction: strTr = " << strTr << endl;
+        //see if zero is in, if not, must have 1 and decrease by 1
+        set<int> lvids;
+        pphTree->GetLeaveIds(lvids);
+        //cout << "lvids : ";
+        //DumpIntSet( lvids );
+        int idInternal = lvids.size();
+        YW_ASSERT_INFO(lvids.find(0) != lvids.end(), "Must adjust leaf label first (to start with 0)");
+
+        //	YW_ASSERT_INFO( lvids.find(1) != lvids.end(), "Wrong" );
+
+        // decrease by one
+        PhylogenyTreeIterator itorTree(*pphTree);
+        itorTree.Init();
+        //pphTree->InitPostorderWalk();
+        while (itorTree.IsDone() == false)
+        {
+            //				TreeNode *pn = pphTree->NextPostorderWalk( ) ;
+            TreeNode *pn = itorTree.GetCurrNode();
+            itorTree.Next();
+            if (pn == NULL)
+            {
+                break; // done with all nodes
+            }
+            if (pn->IsLeaf() == false)
+            {
+                pn->SetID(idInternal++);
+            }
+        }
 
-    //	YW_ASSERT_INFO( lvids.find(1) != lvids.end(), "Wrong" );
+        // mark the change
+        //	fNoChange = false;
 
-    // decrease by one
-    PhylogenyTreeIterator itorTree(*pphTree);
-    itorTree.Init();
-    // pphTree->InitPostorderWalk();
-    while (itorTree.IsDone() == false) {
-      //				TreeNode *pn =
-      // pphTree->NextPostorderWalk( ) ;
-      TreeNode *pn = itorTree.GetCurrNode();
-      itorTree.Next();
-      if (pn == NULL) {
-        break; // done with all nodes
-      }
-      if (pn->IsLeaf() == false) {
-        pn->SetID(idInternal++);
-      }
-    }
-
-    // mark the change
-    //	fNoChange = false;
-
-    vector<int> nidsList, nparsList;
-    pphTree->GetNodeParInfo(nidsList, nparsList);
-    // phTree.GetNodeParInfoNew(nidsList, nparsList);
-    // phTree.GetNodeParInfo(nidsList, nparsList);
-    // if( nLvs <= 0 )
-    //{
-    // string strTrNW;
-    // pphTree->ConsNewick(strTrNW);
-    // cout << "strTrNW: " << strTrNW << endl;
-    treePtrList.push_back(pphTree);
-    // cout << "Newick format of this marginal tree: ";
-    // cout << tree.GetNewick() << endl;
-  }
-  return fNoChange;
+        vector<int> nidsList, nparsList;
+        pphTree->GetNodeParInfo(nidsList, nparsList);
+        //phTree.GetNodeParInfoNew(nidsList, nparsList);
+        //phTree.GetNodeParInfo(nidsList, nparsList);
+        //if( nLvs <= 0 )
+        //{
+        //string strTrNW;
+        //pphTree->ConsNewick(strTrNW);
+        //cout << "strTrNW: " << strTrNW << endl;
+        treePtrList.push_back(pphTree);
+        //cout << "Newick format of this marginal tree: ";
+        //cout << tree.GetNewick() << endl;
+    }
+    return fNoChange;
 }
 
 // create a random tree
-void InitRandomTree(PhylogenyTreeBasic &treeToInit, int numTaxa, int rndSeed) {
-  //
-  if (rndSeed >= 0) {
-    InitRandom(rndSeed);
-  }
-  // create leaves first
-  int idToUseNext = 0;
-  vector<TreeNode *> listActiveNodes;
-  for (int i = 0; i < numTaxa; ++i) {
-    //
-    TreeNode *pLeaf = new TreeNode(idToUseNext++);
-    // label it
-    pLeaf->SetLabel(GetStringFromId(i));
-    listActiveNodes.push_back(pLeaf);
-  }
-  // now create random coalescence
-  while (listActiveNodes.size() > 1) {
-    // get two random nodes and coalesce them
-    int rndpos1 = (int)(listActiveNodes.size() * GetRandFraction());
-    YW_ASSERT_INFO(rndpos1 < (int)listActiveNodes.size(), "overflow");
-    TreeNode *node1 = listActiveNodes[rndpos1];
-    RemoveVecElementAt(listActiveNodes, rndpos1);
-    int rndpos2 = (int)(listActiveNodes.size() * GetRandFraction());
-    YW_ASSERT_INFO(rndpos2 < (int)listActiveNodes.size(), "overflow");
-    TreeNode *node2 = listActiveNodes[rndpos2];
-    RemoveVecElementAt(listActiveNodes, rndpos2);
-    //
-    TreeNode *pnodeNew = new TreeNode(idToUseNext++);
-    vector<int> listEmpty;
-    pnodeNew->AddChild(node1, listEmpty);
-    pnodeNew->AddChild(node2, listEmpty);
-    // add this node to list of active nodes
-    listActiveNodes.push_back(pnodeNew);
-  }
-  // now here is the root
-  YW_ASSERT_INFO(listActiveNodes.size() == 1, "Only one root");
-  treeToInit.SetRoot(listActiveNodes[0]);
-}
-
-void CreatePhyTreeWithRootedSplits(PhylogenyTreeBasic &treeToProc, int numTaxa,
-                                   const set<set<int> > &setGivenSplits) {
-  // create a phy tree with the given rooted splits
-  // ASSUME: taxa starts from 0 to numTaxa-1
-  // result can be a non-binary tree
-  // first order them
-  vector<set<set<int> > > listGivenSplits(numTaxa + 1);
-  for (set<set<int> >::const_iterator it = setGivenSplits.begin();
-       it != setGivenSplits.end(); ++it) {
-    int sz = it->size();
-    listGivenSplits[sz].insert(*it);
-  }
-  // if the whole set is not in, add it so that we have a single lin in the end
-  if (listGivenSplits[numTaxa].size() == 0) {
+void InitRandomTree(PhylogenyTreeBasic &treeToInit, int numTaxa, int rndSeed)
+{
     //
-    set<int> sall;
-    PopulateSetWithInterval(sall, 0, numTaxa - 1);
-    listGivenSplits[numTaxa].insert(sall);
-  }
+    if (rndSeed >= 0)
+    {
+        InitRandom(rndSeed);
+    }
+    // create leaves first
+    int idToUseNext = 0;
+    vector<TreeNode *> listActiveNodes;
+    for (int i = 0; i < numTaxa; ++i)
+    {
+        //
+        TreeNode *pLeaf = new TreeNode(idToUseNext++);
+        // label it
+        pLeaf->SetLabel(GetStringFromId(i));
+        listActiveNodes.push_back(pLeaf);
+    }
+    // now create random coalescence
+    while (listActiveNodes.size() > 1)
+    {
+        // get two random nodes and coalesce them
+        int rndpos1 = (int)(listActiveNodes.size() * GetRandFraction());
+        YW_ASSERT_INFO(rndpos1 < (int)listActiveNodes.size(), "overflow");
+        TreeNode *node1 = listActiveNodes[rndpos1];
+        RemoveVecElementAt(listActiveNodes, rndpos1);
+        int rndpos2 = (int)(listActiveNodes.size() * GetRandFraction());
+        YW_ASSERT_INFO(rndpos2 < (int)listActiveNodes.size(), "overflow");
+        TreeNode *node2 = listActiveNodes[rndpos2];
+        RemoveVecElementAt(listActiveNodes, rndpos2);
+        //
+        TreeNode *pnodeNew = new TreeNode(idToUseNext++);
+        vector<int> listEmpty;
+        pnodeNew->AddChild(node1, listEmpty);
+        pnodeNew->AddChild(node2, listEmpty);
+        // add this node to list of active nodes
+        listActiveNodes.push_back(pnodeNew);
+    }
+    // now here is the root
+    YW_ASSERT_INFO(listActiveNodes.size() == 1, "Only one root");
+    treeToInit.SetRoot(listActiveNodes[0]);
+}
+
+void CreatePhyTreeWithRootedSplits(PhylogenyTreeBasic &treeToProc, int numTaxa, const set<set<int>> &setGivenSplits)
+{
+    // create a phy tree with the given rooted splits
+    // ASSUME: taxa starts from 0 to numTaxa-1
+    // result can be a non-binary tree
+    // first order them
+    vector<set<set<int>>> listGivenSplits(numTaxa + 1);
+    for (set<set<int>>::const_iterator it = setGivenSplits.begin(); it != setGivenSplits.end(); ++it)
+    {
+        int sz = it->size();
+        listGivenSplits[sz].insert(*it);
+    }
+    // if the whole set is not in, add it so that we have a single lin in the end
+    if (listGivenSplits[numTaxa].size() == 0)
+    {
+        //
+        set<int> sall;
+        PopulateSetWithInterval(sall, 0, numTaxa - 1);
+        listGivenSplits[numTaxa].insert(sall);
+    }
 #if 0
 cout << "Set of given splits: ";
 for(int i=0; i<(int)listGivenSplits.size(); ++i)
@@ -257,187 +283,186 @@ DumpIntSet( *it );
 }
 #endif
 
-  // active list of lineages indexed by their set
-  map<set<int>, TreeNode *> mapActiveLins;
-  // initially all the leaf lins
-  int idToUse = 0;
-  for (int i = 0; i < numTaxa; ++i) {
-    TreeNode *pLeaf = new TreeNode(idToUse++);
-    set<int> sint;
-    sint.insert(i);
-    string strLbl = GetStringFromId(i);
-    pLeaf->SetLabel(strLbl);
-    mapActiveLins.insert(map<set<int>, TreeNode *>::value_type(sint, pLeaf));
-  }
-  // now scan through the entire list
-  for (int k = 2; k < (int)listGivenSplits.size(); ++k) {
-    // start from 2 so that avoid trivial sets
-    if (listGivenSplits[k].size() == 0) {
-      continue;
-    }
-    // for each input list, find those lins that is contained within the
-    // clusters
-    for (set<set<int> >::iterator it2 = listGivenSplits[k].begin();
-         it2 != listGivenSplits[k].end(); ++it2) {
-      // each subset corresponds to a new internal node
-      TreeNode *pnode = new TreeNode(idToUse++);
-
-      // cout << "list of active lins: ";
-      // for( map< set<int>, TreeNode *> :: iterator iggg =
-      // mapActiveLins.begin(); iggg != mapActiveLins.end(); ++iggg )
-      //{
-      //    DumpIntSet( iggg->first);
-      //}
-      // cout << "Considering given split: ";
-      // DumpIntSet( *it2 );
-      // find the proper node in the previous set
-      set<set<int> > setMatached;
-      int szTot = 0;
-      for (map<set<int>, TreeNode *>::iterator it3 = mapActiveLins.begin();
-           it3 != mapActiveLins.end(); ++it3) {
+    // active list of lineages indexed by their set
+    map<set<int>, TreeNode *> mapActiveLins;
+    // initially all the leaf lins
+    int idToUse = 0;
+    for (int i = 0; i < numTaxa; ++i)
+    {
+        TreeNode *pLeaf = new TreeNode(idToUse++);
+        set<int> sint;
+        sint.insert(i);
+        string strLbl = GetStringFromId(i);
+        pLeaf->SetLabel(strLbl);
+        mapActiveLins.insert(map<set<int>, TreeNode *>::value_type(sint, pLeaf));
+    }
+    // now scan through the entire list
+    for (int k = 2; k < (int)listGivenSplits.size(); ++k)
+    {
+        // start from 2 so that avoid trivial sets
+        if (listGivenSplits[k].size() == 0)
+        {
+            continue;
+        }
+        // for each input list, find those lins that is contained within the clusters
+        for (set<set<int>>::iterator it2 = listGivenSplits[k].begin(); it2 != listGivenSplits[k].end(); ++it2)
+        {
+            // each subset corresponds to a new internal node
+            TreeNode *pnode = new TreeNode(idToUse++);
+
+            //cout << "list of active lins: ";
+            //for( map< set<int>, TreeNode *> :: iterator iggg = mapActiveLins.begin(); iggg != mapActiveLins.end(); ++iggg )
+            //{
+            //    DumpIntSet( iggg->first);
+            //}
+            //cout << "Considering given split: ";
+            //DumpIntSet( *it2 );
+            // find the proper node in the previous set
+            set<set<int>> setMatached;
+            int szTot = 0;
+            for (map<set<int>, TreeNode *>::iterator it3 = mapActiveLins.begin(); it3 != mapActiveLins.end(); ++it3)
+            {
+                //
+                //cout <<  "treat this active lineage: ";
+                //DumpIntSet( it3->first );
+                if (IsSetContainer(*it2, it3->first) == true)
+                {
+                    //cout << "yes, continer!\n";
+                    //
+                    setMatached.insert(it3->first);
+                    szTot += it3->first.size();
+                    //
+                    vector<int> sempty;
+                    pnode->AddChild(it3->second, sempty);
+                }
+            }
+            YW_ASSERT_INFO(szTot == (int)it2->size(), "Size: mismatch1");
+            // remove the old ones and add the newly created one
+            for (set<set<int>>::iterator it4 = setMatached.begin(); it4 != setMatached.end(); ++it4)
+            {
+                mapActiveLins.erase(*it4);
+            }
+            mapActiveLins.insert(map<set<int>, TreeNode *>::value_type(*it2, pnode));
+        }
+    }
+    YW_ASSERT_INFO(mapActiveLins.size() == 1, "Wrong: must have only a single lineage left");
+    treeToProc.SetRoot(mapActiveLins.begin()->second);
+
+    //string strNW;
+    //treeToProc.ConsNewick(strNW);
+    //cout << "Result of createtreebyrootedplits: " << strNW << endl;
+    //cout << "SetGivenSplits: \n";
+    //for(set<set<int> > :: iterator it = setGivenSplits.begin(); it != setGivenSplits.end(); ++it)
+    //{
+    //DumpIntSet( *it);
+    //}
+    //cout << "numTaxa: " << numTaxa << endl;
+}
+
+void DumpAllSubtreesWithTaxaSize(const vector<PhylogenyTreeBasic *> &listPtrGTrees, int numTaxonSubtree, const char *fileNameOut)
+{
+    ofstream outfile(fileNameOut);
+
+    // dump out subtrees with certain number of taxa (if the tree contains fewer than this number, just dump out
+    // the entire tree)
+    for (int tr = 0; tr < (int)listPtrGTrees.size(); ++tr)
+    {
         //
-        // cout <<  "treat this active lineage: ";
-        // DumpIntSet( it3->first );
-        if (IsSetContainer(*it2, it3->first) == true) {
-          // cout << "yes, continer!\n";
-          //
-          setMatached.insert(it3->first);
-          szTot += it3->first.size();
-          //
-          vector<int> sempty;
-          pnode->AddChild(it3->second, sempty);
-        }
-      }
-      YW_ASSERT_INFO(szTot == (int)it2->size(), "Size: mismatch1");
-      // remove the old ones and add the newly created one
-      for (set<set<int> >::iterator it4 = setMatached.begin();
-           it4 != setMatached.end(); ++it4) {
-        mapActiveLins.erase(*it4);
-      }
-      mapActiveLins.insert(map<set<int>, TreeNode *>::value_type(*it2, pnode));
-    }
-  }
-  YW_ASSERT_INFO(mapActiveLins.size() == 1,
-                 "Wrong: must have only a single lineage left");
-  treeToProc.SetRoot(mapActiveLins.begin()->second);
-
-  // string strNW;
-  // treeToProc.ConsNewick(strNW);
-  // cout << "Result of createtreebyrootedplits: " << strNW << endl;
-  // cout << "SetGivenSplits: \n";
-  // for(set<set<int> > :: iterator it = setGivenSplits.begin(); it !=
-  // setGivenSplits.end(); ++it)
-  //{
-  // DumpIntSet( *it);
-  //}
-  // cout << "numTaxa: " << numTaxa << endl;
-}
-
-void DumpAllSubtreesWithTaxaSize(
-    const vector<PhylogenyTreeBasic *> &listPtrGTrees, int numTaxonSubtree,
-    const char *fileNameOut) {
-  ofstream outfile(fileNameOut);
-
-  // dump out subtrees with certain number of taxa (if the tree contains fewer
-  // than this number, just dump out the entire tree)
-  for (int tr = 0; tr < (int)listPtrGTrees.size(); ++tr) {
-    //
-    set<string> listLeafLabelsSet;
-    vector<string> listLeafLabels, listLeafLabelsSetDistinct;
-    listPtrGTrees[tr]->GetAllLeafLabeles(listLeafLabels);
-    PopulateSetByVecGen(listLeafLabelsSet, listLeafLabels);
-    PopulateVecBySetGen(listLeafLabelsSetDistinct, listLeafLabelsSet);
+        set<string> listLeafLabelsSet;
+        vector<string> listLeafLabels, listLeafLabelsSetDistinct;
+        listPtrGTrees[tr]->GetAllLeafLabeles(listLeafLabels);
+        PopulateSetByVecGen(listLeafLabelsSet, listLeafLabels);
+        PopulateVecBySetGen(listLeafLabelsSetDistinct, listLeafLabelsSet);
+
+        //
+        int numSubsetSz = numTaxonSubtree;
+        if (numSubsetSz > (int)listLeafLabelsSetDistinct.size())
+        {
+            numSubsetSz = listLeafLabelsSetDistinct.size();
+        }
+
+        // find all subsets
+        vector<int> posvec;
+        GetFirstCombo(numSubsetSz, (int)listLeafLabelsSetDistinct.size(), posvec);
+        while (true)
+        {
+            set<string> setTaxaStep;
+            for (int i = 0; i < (int)posvec.size(); ++i)
+            {
+                setTaxaStep.insert(listLeafLabelsSetDistinct[posvec[i]]);
+            }
+
+            //
+            PhylogenyTreeBasic *ptreeNew = new PhylogenyTreeBasic;
+            listPtrGTrees[tr]->CreatePhyTreeFromLeavesWithLabels(setTaxaStep, *ptreeNew, true);
+            string nwTree;
+            ptreeNew->ConsNewick(nwTree);
+            outfile << nwTree << endl;
+            delete ptreeNew;
+
+            if (GetNextCombo(numSubsetSz, (int)listLeafLabelsSetDistinct.size(), posvec) == false)
+            {
+                break;
+            }
+        }
+    }
+
+    outfile.close();
+}
 
+void DumpAllSubtreesWithBoundedSize(const vector<PhylogenyTreeBasic *> &listPtrGTrees, int maxSzSubtree, int maxIdentSubtreeSz, const char *fileNameOut)
+{
     //
-    int numSubsetSz = numTaxonSubtree;
-    if (numSubsetSz > (int)listLeafLabelsSetDistinct.size()) {
-      numSubsetSz = listLeafLabelsSetDistinct.size();
-    }
-
-    // find all subsets
-    vector<int> posvec;
-    GetFirstCombo(numSubsetSz, (int)listLeafLabelsSetDistinct.size(), posvec);
-    while (true) {
-      set<string> setTaxaStep;
-      for (int i = 0; i < (int)posvec.size(); ++i) {
-        setTaxaStep.insert(listLeafLabelsSetDistinct[posvec[i]]);
-      }
-
-      //
-      PhylogenyTreeBasic *ptreeNew = new PhylogenyTreeBasic;
-      listPtrGTrees[tr]->CreatePhyTreeFromLeavesWithLabels(setTaxaStep,
-                                                           *ptreeNew, true);
-      string nwTree;
-      ptreeNew->ConsNewick(nwTree);
-      outfile << nwTree << endl;
-      delete ptreeNew;
-
-      if (GetNextCombo(numSubsetSz, (int)listLeafLabelsSetDistinct.size(),
-                       posvec) == false) {
-        break;
-      }
-    }
-  }
-
-  outfile.close();
-}
-
-void DumpAllSubtreesWithBoundedSize(
-    const vector<PhylogenyTreeBasic *> &listPtrGTrees, int maxSzSubtree,
-    int maxIdentSubtreeSz, const char *fileNameOut) {
-  //
-  // cout << "DumpAllSubtreesWithBoundedSize: maxSzSubtree: " << maxSzSubtree <<
-  // ", maxIdentSubtreeSz: " << maxIdentSubtreeSz << ", filenameOut: " <<
-  // fileNameOut << endl;
-  // dump all subtrees with at most maxSzSubtree leaves into a file (that is,
-  // breaking trees into pieces) in order to avoid issues that large subtrees
-  // with identical labels, we first shrink such subtree within the size (if
-  // exists) e.g. maxIdentSubtreeSz = 5 and maxSzSubtree = 10 YW: 12/09/15: in
-  // case of a non-binary tree, we may have multiple subtrees as siblings; if
-  // this is the case, output each pair of subtrees YW: 12/10/15: don't output
-  // trees with only two siblings
-  ofstream outfile(fileNameOut);
-
-  // dump out subtrees with certain number of taxa (if the tree contains fewer
-  // than this number, just dump out the entire tree)
-  bool fTreeOut = false;
-  for (int tr = 0; tr < (int)listPtrGTrees.size(); ++tr) {
-    // cout << "Processing tree: " << tr << endl;
-    // create a new tree where identical subtrees match what we want
-    PhylogenyTreeBasic *ptreeWork =
-        ConsPhyTreeShrinkIdentSubtrees(listPtrGTrees[tr], maxIdentSubtreeSz);
-    // cout << "tree working: ";
-    // ptreeWork->Dump();
-    // find all subtrees that are no bigger than the desired ones
-    set<TreeNode *> setSTRoots;
-    ptreeWork->GetSubtreesWithMaxSize(setSTRoots, maxSzSubtree);
-    // cout << "Number of subtrees: " << setSTRoots.size() << endl;
-
-    // find any missing
-    // set<string> setLabelsPresent;
-    // PhylogenyTreeBasic :: FindAllLabelsInSubtrees(setSTRoots,
-    // setLabelsPresent); set<string> setLabelsMiss;
-    // ptreeWork->GetRoot()->GetAllDistinctLeafLabeles(setLabelsMiss);
-    // SubtractSetsGen(setLabelsMiss, setLabelsPresent);
-
-    // list of all subtrees that are uniform
-    set<TreeNode *> setSTUniform;
-    for (set<TreeNode *>::iterator it = setSTRoots.begin();
-         it != setSTRoots.end(); ++it) {
-      //
-      set<string> strLblsStep;
-      (*it)->GetAllDistinctLeafLabeles(strLblsStep);
-
-      if (strLblsStep.size() == 1) {
-        setSTUniform.insert(*it);
-      }
-    }
-    // cout << "Number of uniform subtrees: " << setSTUniform.size() << endl;
-
-    // output each subtree one by one
-    PhylogenyTreeBasic *ptreeNew = new PhylogenyTreeBasic;
-
-    while (setSTRoots.size() >= 1) {
+    //cout << "DumpAllSubtreesWithBoundedSize: maxSzSubtree: " << maxSzSubtree << ", maxIdentSubtreeSz: " << maxIdentSubtreeSz << ", filenameOut: " << fileNameOut << endl;
+    // dump all subtrees with at most maxSzSubtree leaves into a file (that is, breaking trees into pieces)
+    // in order to avoid issues that large subtrees with identical labels, we first shrink such subtree within the size (if exists)
+    // e.g. maxIdentSubtreeSz = 5 and maxSzSubtree = 10
+    // YW: 12/09/15: in case of a non-binary tree, we may have multiple subtrees as siblings; if this is the case, output each pair of subtrees
+    // YW: 12/10/15: don't output trees with only two siblings
+    ofstream outfile(fileNameOut);
+
+    // dump out subtrees with certain number of taxa (if the tree contains fewer than this number, just dump out
+    // the entire tree)
+    bool fTreeOut = false;
+    for (int tr = 0; tr < (int)listPtrGTrees.size(); ++tr)
+    {
+        //cout << "Processing tree: " << tr << endl;
+        // create a new tree where identical subtrees match what we want
+        PhylogenyTreeBasic *ptreeWork = ConsPhyTreeShrinkIdentSubtrees(listPtrGTrees[tr], maxIdentSubtreeSz);
+        //cout << "tree working: ";
+        //ptreeWork->Dump();
+        // find all subtrees that are no bigger than the desired ones
+        set<TreeNode *> setSTRoots;
+        ptreeWork->GetSubtreesWithMaxSize(setSTRoots, maxSzSubtree);
+        //cout << "Number of subtrees: " << setSTRoots.size() << endl;
+
+        // find any missing
+        //set<string> setLabelsPresent;
+        //PhylogenyTreeBasic :: FindAllLabelsInSubtrees(setSTRoots, setLabelsPresent);
+        //set<string> setLabelsMiss;
+        //ptreeWork->GetRoot()->GetAllDistinctLeafLabeles(setLabelsMiss);
+        //SubtractSetsGen(setLabelsMiss, setLabelsPresent);
+
+        // list of all subtrees that are uniform
+        set<TreeNode *> setSTUniform;
+        for (set<TreeNode *>::iterator it = setSTRoots.begin(); it != setSTRoots.end(); ++it)
+        {
+            //
+            set<string> strLblsStep;
+            (*it)->GetAllDistinctLeafLabeles(strLblsStep);
+
+            if (strLblsStep.size() == 1)
+            {
+                setSTUniform.insert(*it);
+            }
+        }
+        //cout << "Number of uniform subtrees: " << setSTUniform.size() << endl;
+
+        // output each subtree one by one
+        PhylogenyTreeBasic *ptreeNew = new PhylogenyTreeBasic;
+
+        while (setSTRoots.size() >= 1)
+        {
 #if 0
 cout << "Start of each iteration: tree is: ";
 ptreeWork->Dump();
@@ -450,138 +475,145 @@ cout << endl;
 }
 #endif
 
-      TreeNode *pnSTRootCurr = NULL;
-      set<TreeNode *> setSTToRemove;
+            TreeNode *pnSTRootCurr = NULL;
+            set<TreeNode *> setSTToRemove;
 
-      // rule: if there is a non-uniform subtree, output it
-      for (set<TreeNode *>::iterator itg = setSTRoots.begin();
-           itg != setSTRoots.end(); ++itg) {
-        //
-        if (setSTUniform.find(*itg) == setSTUniform.end()) {
-          //
-          pnSTRootCurr = *itg;
-          break;
-        }
-      }
-      if (pnSTRootCurr == NULL) {
-        // if no non-uniform subtrees are found, find a sibling pairs of
-        // subtrees and take the whole subtree to output YW: need to be careful;
-        // I don't want to have left-over
-        set<TreeNode *> ppSibs;
-        bool fres =
-            PhylogenyTreeBasic ::GetSiblingsNodesFrom(setSTRoots, ppSibs);
-        YW_ASSERT_INFO(fres == true, "Fail to find silblings");
-        pnSTRootCurr = (*ppSibs.begin())->GetParent();
-        while (true) {
-          // find out how many subtrees covered if taking this
-          set<TreeNode *> setSTCoveredStep;
-          PhylogenyTreeBasic ::FindDescendentsOfNodeWithin(
-              pnSTRootCurr, setSTRoots, setSTCoveredStep);
-          // if there are at least two left, use it or we have reached the root
-          if ((int)setSTCoveredStep.size() + 1 < (int)setSTRoots.size() ||
-              pnSTRootCurr == ptreeWork->GetRoot()) {
-            //
-            break;
-          } else {
-            // move up
-            pnSTRootCurr = pnSTRootCurr->GetParent();
-          }
-        }
-
-      } else {
-        // if there are only one leftover and it is uniform one, output all the
-        // tree
-        if (setSTRoots.size() == 2 && setSTUniform.size() > 0) {
-          //
-          pnSTRootCurr = ptreeWork->GetRoot();
-        }
-      }
-
-      // remove any subtrees that are descendent of the output subtree
-      // just output it
-      YW_ASSERT_INFO(pnSTRootCurr != NULL, "Cannot be NULL");
-      PhylogenyTreeBasic ::FindDescendentsOfNodeWithin(pnSTRootCurr, setSTRoots,
-                                                       setSTToRemove);
-
-      // cout << "******** pnSTRootCurr: ";
-      // pnSTRootCurr->Dump();
-
-      // if this is a single node or degree of this node is two, just output it
-      if (setSTRoots.find(pnSTRootCurr) != setSTRoots.end() ||
-          pnSTRootCurr->GetChildrenNum() == 2) {
-        // cout << "******** outputing subtree rooted at: ";
-        // pnSTRootCurr->Dump();
-
-        ptreeNew->SetRootPlain(pnSTRootCurr);
-        // if the tree has at least one intermediate node, output it
-        // if( ptreeNew->GetNumInternalNodes() >= 2 )
-        {
-          string nwTree;
-          ptreeNew->ConsNewick(nwTree);
-          outfile << nwTree << endl;
-          fTreeOut = true;
-        }
-      } else {
-        YW_ASSERT_INFO(pnSTRootCurr->GetChildrenNum() >= 3,
-                       "Must be a mulfurcating node");
-        // now enumerate all pairs of children of this node
-        TreeNode *pnRootNew = new TreeNode;
-        ptreeNew->SetRootPlain(pnRootNew);
-        vector<TreeNode *> listChildren;
-        set<TreeNode *> listChildrenSet;
-        pnSTRootCurr->GetAllChildren(listChildrenSet);
-        PopulateVecBySetGen(listChildren, listChildrenSet);
-        vector<int> posvec;
-        GetFirstCombo(2, (int)listChildren.size(), posvec);
-        while (true) {
-          vector<int> vecdummy;
-          pnRootNew->AddChild(listChildren[posvec[0]], vecdummy);
-          pnRootNew->AddChild(listChildren[posvec[1]], vecdummy);
-          // if( ptreeNew->GetNumInternalNodes() >= 2 )
-          {
-            string nwTree;
-            ptreeNew->ConsNewick(nwTree);
-            outfile << nwTree << endl;
-            fTreeOut = true;
-          }
-          pnRootNew->DetachAllChildren();
-          listChildren[posvec[0]]->SetParent(pnSTRootCurr);
-          listChildren[posvec[1]]->SetParent(pnSTRootCurr);
-          if (GetNextCombo(2, (int)listChildren.size(), posvec) == false) {
-            break;
-          }
-        }
-        //
-        delete pnRootNew;
-      }
+            // rule: if there is a non-uniform subtree, output it
+            for (set<TreeNode *>::iterator itg = setSTRoots.begin(); itg != setSTRoots.end(); ++itg)
+            {
+                //
+                if (setSTUniform.find(*itg) == setSTUniform.end())
+                {
+                    //
+                    pnSTRootCurr = *itg;
+                    break;
+                }
+            }
+            if (pnSTRootCurr == NULL)
+            {
+                // if no non-uniform subtrees are found, find a sibling pairs of subtrees and take the whole subtree to output
+                // YW: need to be careful; I don't want to have left-over
+                set<TreeNode *> ppSibs;
+                bool fres = PhylogenyTreeBasic ::GetSiblingsNodesFrom(setSTRoots, ppSibs);
+                YW_ASSERT_INFO(fres == true, "Fail to find silblings");
+                pnSTRootCurr = (*ppSibs.begin())->GetParent();
+                while (true)
+                {
+                    // find out how many subtrees covered if taking this
+                    set<TreeNode *> setSTCoveredStep;
+                    PhylogenyTreeBasic ::FindDescendentsOfNodeWithin(pnSTRootCurr, setSTRoots, setSTCoveredStep);
+                    // if there are at least two left, use it or we have reached the root
+                    if ((int)setSTCoveredStep.size() + 1 < (int)setSTRoots.size() || pnSTRootCurr == ptreeWork->GetRoot())
+                    {
+                        //
+                        break;
+                    }
+                    else
+                    {
+                        // move up
+                        pnSTRootCurr = pnSTRootCurr->GetParent();
+                    }
+                }
+            }
+            else
+            {
+                // if there are only one leftover and it is uniform one, output all the tree
+                if (setSTRoots.size() == 2 && setSTUniform.size() > 0)
+                {
+                    //
+                    pnSTRootCurr = ptreeWork->GetRoot();
+                }
+            }
 
-      // now detach this node from the rest of tree
-      TreeNode *pnparcurr = pnSTRootCurr->GetParent();
-      pnSTRootCurr->DetachSelf();
+            // remove any subtrees that are descendent of the output subtree
+            // just output it
+            YW_ASSERT_INFO(pnSTRootCurr != NULL, "Cannot be NULL");
+            PhylogenyTreeBasic ::FindDescendentsOfNodeWithin(pnSTRootCurr, setSTRoots, setSTToRemove);
 
-      if (pnSTRootCurr == ptreeWork->GetRoot()) {
-        break;
-      }
+            //cout << "******** pnSTRootCurr: ";
+            //pnSTRootCurr->Dump();
 
-      delete pnSTRootCurr;
-      pnSTRootCurr = NULL;
+            // if this is a single node or degree of this node is two, just output it
+            if (setSTRoots.find(pnSTRootCurr) != setSTRoots.end() || pnSTRootCurr->GetChildrenNum() == 2)
+            {
+                //cout << "******** outputing subtree rooted at: ";
+                //pnSTRootCurr->Dump();
+
+                ptreeNew->SetRootPlain(pnSTRootCurr);
+                // if the tree has at least one intermediate node, output it
+                //if( ptreeNew->GetNumInternalNodes() >= 2 )
+                {
+                    string nwTree;
+                    ptreeNew->ConsNewick(nwTree);
+                    outfile << nwTree << endl;
+                    fTreeOut = true;
+                }
+            }
+            else
+            {
+                YW_ASSERT_INFO(pnSTRootCurr->GetChildrenNum() >= 3, "Must be a mulfurcating node");
+                // now enumerate all pairs of children of this node
+                TreeNode *pnRootNew = new TreeNode;
+                ptreeNew->SetRootPlain(pnRootNew);
+                vector<TreeNode *> listChildren;
+                set<TreeNode *> listChildrenSet;
+                pnSTRootCurr->GetAllChildren(listChildrenSet);
+                PopulateVecBySetGen(listChildren, listChildrenSet);
+                vector<int> posvec;
+                GetFirstCombo(2, (int)listChildren.size(), posvec);
+                while (true)
+                {
+                    vector<int> vecdummy;
+                    pnRootNew->AddChild(listChildren[posvec[0]], vecdummy);
+                    pnRootNew->AddChild(listChildren[posvec[1]], vecdummy);
+                    //if( ptreeNew->GetNumInternalNodes() >= 2 )
+                    {
+                        string nwTree;
+                        ptreeNew->ConsNewick(nwTree);
+                        outfile << nwTree << endl;
+                        fTreeOut = true;
+                    }
+                    pnRootNew->DetachAllChildren();
+                    listChildren[posvec[0]]->SetParent(pnSTRootCurr);
+                    listChildren[posvec[1]]->SetParent(pnSTRootCurr);
+                    if (GetNextCombo(2, (int)listChildren.size(), posvec) == false)
+                    {
+                        break;
+                    }
+                }
+                //
+                delete pnRootNew;
+            }
 
-      // cout << "Before degree-one cleainup, tree is: ";
-      // ptreeWork->Dump();
-      // exit(1);
+            // now detach this node from the rest of tree
+            TreeNode *pnparcurr = pnSTRootCurr->GetParent();
+            pnSTRootCurr->DetachSelf();
 
-      // if( pnparcurr != NULL && pnparcurr != ptreeWork->GetRoot() )
-      if (pnparcurr != NULL) {
-        ptreeWork->RemoveDegreeOneNodeAt(pnparcurr);
-      }
-      for (set<TreeNode *>::iterator it = setSTToRemove.begin();
-           it != setSTToRemove.end(); ++it) {
-        setSTRoots.erase(*it);
-      }
-    }
-    ptreeNew->SetRootPlain(NULL);
-    // delete ptreeNew;
-    // cout << "output tree deleted\n";
+            if (pnSTRootCurr == ptreeWork->GetRoot())
+            {
+                break;
+            }
+
+            delete pnSTRootCurr;
+            pnSTRootCurr = NULL;
+
+            //cout << "Before degree-one cleainup, tree is: ";
+            //ptreeWork->Dump();
+            //exit(1);
+
+            //if( pnparcurr != NULL && pnparcurr != ptreeWork->GetRoot() )
+            if (pnparcurr != NULL)
+            {
+                ptreeWork->RemoveDegreeOneNodeAt(pnparcurr);
+            }
+            for (set<TreeNode *>::iterator it = setSTToRemove.begin(); it != setSTToRemove.end(); ++it)
+            {
+                setSTRoots.erase(*it);
+            }
+        }
+        ptreeNew->SetRootPlain(NULL);
+        //delete ptreeNew;
+        //cout << "output tree deleted\n";
 
 #if 0
         // create a psuedo tree just for outputing
@@ -610,133 +642,136 @@ cout << endl;
 cout << "output tree deleted\n";
 #endif
 
-    delete ptreeWork;
-    // cout << "Shrunk tree deleted.\n";
-  }
-
-  outfile.close();
-  // cout << "Tree outoupt finished.\n";
-  YW_ASSERT_INFO(fTreeOut == true,
-                 "ERROR: no subtrees output. Your trees appear to be either "
-                 "very clustered into uniform subtrees or the parameters (size "
-                 "of subtree and identical trees size upper bounds are wrong.");
-}
-
-PhylogenyTreeBasic *ConsPhyTreeShrinkIdentSubtrees(PhylogenyTreeBasic *ptreeIn2,
-                                                   int maxIdentSubtreeSz,
-                                                   bool fIdConsecutive) {
-  // create a new tree
-  PhylogenyTreeBasic *ptreeRes = new PhylogenyTreeBasic;
-  // construct according to Newick format
-  string strNW;
-  ptreeIn2->ConsNewick(strNW, false, 1.0, true);
-  ptreeRes->ConsOnNewick(strNW);
-
-  // cout << "ConsPhyTreeShrinkIdentSubtrees: tree in: " << strNW << endl;
-
-  // create a tree with identical subtree that is no greater than the given size
-  // (i.e. if a subtree is of the same label, shrink it if needed) first obtain
-  // the max identity subtrees
-  set<TreeNode *> setSTRootsIdents;
-  ptreeRes->GetMaxSubtrees(setSTRootsIdents);
-  // cout << "Number of maximiaml subtrees: " << setSTRootsIdents.size() <<
-  // endl;
-
-  // find all the leaves
-  vector<set<TreeNode *> > listMaxSubtreesLeaves;
-  for (set<TreeNode *>::iterator it = setSTRootsIdents.begin();
-       it != setSTRootsIdents.end(); ++it) {
-    set<TreeNode *> setLeavesUnder;
-    (*it)->GetAllLeavesUnder(setLeavesUnder);
-    listMaxSubtreesLeaves.push_back(setLeavesUnder);
-    // cout << "Sz of subtree found: " << setLeavesUnder.size() << endl;
-  }
-
-  // now remove nodes until the subtrees is no longer too large
-  for (int i = 0; i < (int)listMaxSubtreesLeaves.size(); ++i) {
+        delete ptreeWork;
+        //cout << "Shrunk tree deleted.\n";
+    }
+
+    outfile.close();
+    //cout << "Tree outoupt finished.\n";
+    YW_ASSERT_INFO(fTreeOut == true, "ERROR: no subtrees output. Your trees appear to be either very clustered into uniform subtrees or the parameters (size of subtree and identical trees size upper bounds are wrong.");
+}
+
+PhylogenyTreeBasic *ConsPhyTreeShrinkIdentSubtrees(PhylogenyTreeBasic *ptreeIn2, int maxIdentSubtreeSz, bool fIdConsecutive)
+{
+    // create a new tree
+    PhylogenyTreeBasic *ptreeRes = new PhylogenyTreeBasic;
+    // construct according to Newick format
+    string strNW;
+    ptreeIn2->ConsNewick(strNW, false, 1.0, true);
+    ptreeRes->ConsOnNewick(strNW);
+
+    //cout << "ConsPhyTreeShrinkIdentSubtrees: tree in: " << strNW << endl;
+
+    // create a tree with identical subtree that is no greater than the given size (i.e. if a subtree is of the same label, shrink it if needed)
+    // first obtain the max identity subtrees
+    set<TreeNode *> setSTRootsIdents;
+    ptreeRes->GetMaxSubtrees(setSTRootsIdents);
+    //cout << "Number of maximiaml subtrees: " << setSTRootsIdents.size() << endl;
+
+    // find all the leaves
+    vector<set<TreeNode *>> listMaxSubtreesLeaves;
+    for (set<TreeNode *>::iterator it = setSTRootsIdents.begin(); it != setSTRootsIdents.end(); ++it)
+    {
+        set<TreeNode *> setLeavesUnder;
+        (*it)->GetAllLeavesUnder(setLeavesUnder);
+        listMaxSubtreesLeaves.push_back(setLeavesUnder);
+        //cout << "Sz of subtree found: " << setLeavesUnder.size() << endl;
+    }
+
+    // now remove nodes until the subtrees is no longer too large
+    for (int i = 0; i < (int)listMaxSubtreesLeaves.size(); ++i)
+    {
+        //
+        if ((int)listMaxSubtreesLeaves[i].size() > maxIdentSubtreeSz)
+        {
+            vector<TreeNode *> listNodes;
+            PopulateVecBySetGen(listNodes, listMaxSubtreesLeaves[i]);
+
+            // remove some leaves
+            for (int j = maxIdentSubtreeSz; j < (int)listMaxSubtreesLeaves[i].size(); ++j)
+            {
+                ptreeRes->RemoveNodeKeepChildren(listNodes[j]);
+
+                //cout << "After removing a leaf: current tree: ";
+                //string tr;
+                //ptreeRes->ConsNewick(tr);
+                //cout << tr << endl;
+            }
+        }
+    }
+
+    //cout << "ConsPhyTreeShrinkIdentSubtrees: resulting tree: ";
+    //string tr;
+    //ptreeRes->ConsNewick(tr);
+    //cout << tr << endl;
+
+    // YW: set consecutive id?
+    if (fIdConsecutive == true)
+    {
+        AssignConsecutiveIdsForTree(*ptreeRes);
+    }
+
+    return ptreeRes;
+}
+
+void ChangebackLeafLabelForTreeWithZeroBaseId(PhylogenyTreeBasic *ptree, TaxaMapper *pTMapper)
+{
+    //cout << "Before ChangebackLeafLabelForTreeWithZeroBaseId: ";
+    //ptree->Dump();
     //
-    if ((int)listMaxSubtreesLeaves[i].size() > maxIdentSubtreeSz) {
-      vector<TreeNode *> listNodes;
-      PopulateVecBySetGen(listNodes, listMaxSubtreesLeaves[i]);
-
-      // remove some leaves
-      for (int j = maxIdentSubtreeSz; j < (int)listMaxSubtreesLeaves[i].size();
-           ++j) {
-        ptreeRes->RemoveNodeKeepChildren(listNodes[j]);
-
-        // cout << "After removing a leaf: current tree: ";
-        // string tr;
-        // ptreeRes->ConsNewick(tr);
-        // cout << tr << endl;
-      }
-    }
-  }
-
-  // cout << "ConsPhyTreeShrinkIdentSubtrees: resulting tree: ";
-  // string tr;
-  // ptreeRes->ConsNewick(tr);
-  // cout << tr << endl;
-
-  // YW: set consecutive id?
-  if (fIdConsecutive == true) {
-    AssignConsecutiveIdsForTree(*ptreeRes);
-  }
-
-  return ptreeRes;
-}
-
-void ChangebackLeafLabelForTreeWithZeroBaseId(PhylogenyTreeBasic *ptree,
-                                              TaxaMapper *pTMapper) {
-  // cout << "Before ChangebackLeafLabelForTreeWithZeroBaseId: ";
-  // ptree->Dump();
-  //
-  YW_ASSERT_INFO(pTMapper != NULL, "Must have a mapper");
-  vector<TreeNode *> listLeafNodes;
-  ptree->GetAllLeafNodes(listLeafNodes);
-  for (int i = 0; i < (int)listLeafNodes.size(); ++i) {
-    // get the int id
-    int lbl = listLeafNodes[i]->GetIntLabel();
-    string lblOrig = pTMapper->GetString(lbl);
-    // cout << "lbl:" << lbl << ", lblOrig: " << lblOrig << endl;;
-    listLeafNodes[i]->SetLabel(lblOrig);
-  }
-  // cout << "After ChangebackLeafLabelForTreeWithZeroBaseId: ";
-  // ptree->Dump();
-}
-
-bool ConvPhyloTreesToZeroBasedId(vector<PhylogenyTreeBasic *> &treePtrList,
-                                 TaxaMapper *pTMapper) {
-  // the given trees are not zero-based; so convert them to be; pTMMapeer: not
-  // initialied upon entry; then store the mapping between id to string
-  for (int i = 0; i < (int)treePtrList.size(); ++i) {
+    YW_ASSERT_INFO(pTMapper != NULL, "Must have a mapper");
     vector<TreeNode *> listLeafNodes;
-    treePtrList[i]->GetAllLeafNodes(listLeafNodes);
-    if (pTMapper->IsInitialized() == false) {
-      //
-      for (int i = 0; i < (int)listLeafNodes.size(); ++i) {
-        // get the int id
-        string lbl = listLeafNodes[i]->GetLabel();
-        int idTouse = pTMapper->AddTaxaString(lbl);
-        // cout << "lbl:" << lbl << ", lblOrig: " << lblOrig << endl;;
-        listLeafNodes[i]->SetIntLabel(idTouse);
-      }
-    } else {
-      //
-      for (int i = 0; i < (int)listLeafNodes.size(); ++i) {
+    ptree->GetAllLeafNodes(listLeafNodes);
+    for (int i = 0; i < (int)listLeafNodes.size(); ++i)
+    {
         // get the int id
         int lbl = listLeafNodes[i]->GetIntLabel();
         string lblOrig = pTMapper->GetString(lbl);
-        // cout << "lbl:" << lbl << ", lblOrig: " << lblOrig << endl;;
+        //cout << "lbl:" << lbl << ", lblOrig: " << lblOrig << endl;;
         listLeafNodes[i]->SetLabel(lblOrig);
-      }
     }
-  }
-  return true; // for now, just true
+    //cout << "After ChangebackLeafLabelForTreeWithZeroBaseId: ";
+    //ptree->Dump();
+}
+
+bool ConvPhyloTreesToZeroBasedId(vector<PhylogenyTreeBasic *> &treePtrList, TaxaMapper *pTMapper)
+{
+    // the given trees are not zero-based; so convert them to be; pTMMapeer: not initialied upon entry; then
+    // store the mapping between id to string
+    for (int i = 0; i < (int)treePtrList.size(); ++i)
+    {
+        vector<TreeNode *> listLeafNodes;
+        treePtrList[i]->GetAllLeafNodes(listLeafNodes);
+        if (pTMapper->IsInitialized() == false)
+        {
+            //
+            for (int i = 0; i < (int)listLeafNodes.size(); ++i)
+            {
+                // get the int id
+                string lbl = listLeafNodes[i]->GetLabel();
+                int idTouse = pTMapper->AddTaxaString(lbl);
+                //cout << "lbl:" << lbl << ", lblOrig: " << lblOrig << endl;;
+                listLeafNodes[i]->SetIntLabel(idTouse);
+            }
+        }
+        else
+        {
+            //
+            for (int i = 0; i < (int)listLeafNodes.size(); ++i)
+            {
+                // get the int id
+                int lbl = listLeafNodes[i]->GetIntLabel();
+                string lblOrig = pTMapper->GetString(lbl);
+                //cout << "lbl:" << lbl << ", lblOrig: " << lblOrig << endl;;
+                listLeafNodes[i]->SetLabel(lblOrig);
+            }
+        }
+    }
+    return true; // for now, just true
 }
 
-void ChangeLeafIntLabelOfTree(PhylogenyTreeBasic &treeToChange,
-                              const map<int, int> &mapOldIntLblToNewIntLbl,
-                              bool fSetUserLblToo) {
+void ChangeLeafIntLabelOfTree(PhylogenyTreeBasic &treeToChange, const map<int, int> &mapOldIntLblToNewIntLbl, bool fSetUserLblToo)
+{
 #if 0
 cout << "Before ChangeLeafIntLabelOfTree: ";
 treeToChange.Dump();
@@ -747,36 +782,37 @@ cout << "[" << it->first << "," << it->second << "] ";
 }
 cout << endl;
 #endif
-  //
-  vector<TreeNode *> listLeafNodes;
-  treeToChange.GetAllLeafNodes(listLeafNodes);
-  for (int i = 0; i < (int)listLeafNodes.size(); ++i) {
-    // get the int id
-    int lbl = listLeafNodes[i]->GetIntLabel();
-
-    if (mapOldIntLblToNewIntLbl.find(lbl) == mapOldIntLblToNewIntLbl.end()) {
-      treeToChange.Dump();
-      cout << "lbl: " << lbl << endl;
-      cout << "mapOldIntLblToNewIntLbl: ";
-      for (map<int, int>::const_iterator it = mapOldIntLblToNewIntLbl.begin();
-           it != mapOldIntLblToNewIntLbl.end(); ++it) {
-        cout << "[" << it->first << ", " << it->second << "]   ";
-      }
-      cout << endl;
-    }
-
-    YW_ASSERT_INFO(mapOldIntLblToNewIntLbl.find(lbl) !=
-                       mapOldIntLblToNewIntLbl.end(),
-                   "Fail to find the orignal label");
-    int lblIntNew = (*(mapOldIntLblToNewIntLbl.find(lbl))).second;
-    // cout << "lbl:" << lbl << ", lblIntNew: " << lblIntNew << endl;;
-    listLeafNodes[i]->SetIntLabel(lblIntNew);
-    if (fSetUserLblToo) {
-      char buf[100];
-      sprintf(buf, "%d", lblIntNew);
-      string strbuf(buf);
-      listLeafNodes[i]->SetUserLabel(strbuf);
-    }
+    //
+    vector<TreeNode *> listLeafNodes;
+    treeToChange.GetAllLeafNodes(listLeafNodes);
+    for (int i = 0; i < (int)listLeafNodes.size(); ++i)
+    {
+        // get the int id
+        int lbl = listLeafNodes[i]->GetIntLabel();
+
+        if (mapOldIntLblToNewIntLbl.find(lbl) == mapOldIntLblToNewIntLbl.end())
+        {
+            treeToChange.Dump();
+            cout << "lbl: " << lbl << endl;
+            cout << "mapOldIntLblToNewIntLbl: ";
+            for (map<int, int>::const_iterator it = mapOldIntLblToNewIntLbl.begin(); it != mapOldIntLblToNewIntLbl.end(); ++it)
+            {
+                cout << "[" << it->first << ", " << it->second << "]   ";
+            }
+            cout << endl;
+        }
+
+        YW_ASSERT_INFO(mapOldIntLblToNewIntLbl.find(lbl) != mapOldIntLblToNewIntLbl.end(), "Fail to find the orignal label");
+        int lblIntNew = (*(mapOldIntLblToNewIntLbl.find(lbl))).second;
+        //cout << "lbl:" << lbl << ", lblIntNew: " << lblIntNew << endl;;
+        listLeafNodes[i]->SetIntLabel(lblIntNew);
+        if (fSetUserLblToo)
+        {
+            char buf[100];
+            sprintf(buf, "%d", lblIntNew);
+            string strbuf(buf);
+            listLeafNodes[i]->SetUserLabel(strbuf);
+        }
 #if 0
 // for now, also set user label as well
 char buf[100];
@@ -784,3970 +820,4380 @@ sprintf(buf, "%d",lblIntNew);
 string strbuf(buf);
 listLeafNodes[i]->SetUserLabel(strbuf);
 #endif
-  }
+    }
 #if 0
 cout << "After ChangeLeafIntLabelOfTree: ";
 treeToChange.Dump();
 #endif
 }
 
-void AssignConsecutiveIdsForTree(PhylogenyTreeBasic &treeToChange) {
-  //
-  vector<TreeNode *> listAllNodes;
-  treeToChange.GetAllNodes(listAllNodes);
-  int idToUse = 0;
-  for (int i = 0; i < (int)listAllNodes.size(); ++i) {
-    // leaves assigned to a distinct id first
-    if (listAllNodes[i]->IsLeaf() == true) {
-      listAllNodes[i]->SetID(idToUse++);
-    }
-  }
-  for (int i = 0; i < (int)listAllNodes.size(); ++i) {
-    // leaves assigned to a distinct id first
-    if (listAllNodes[i]->IsLeaf() == false) {
-      listAllNodes[i]->SetID(idToUse++);
-    }
-  }
-}
-
-void RandTrimLeavesFromTree(PhylogenyTreeBasic *ptreeToTrim,
-                            int numLeavesRemain) {
-  // do nothing if the gene trees are small
-  if (ptreeToTrim->GetNumLeaves() <= numLeavesRemain) {
-    return;
-  }
-
-  // cout << "RandTrimLeavesFromTree: before trimming: tree is: ";
-  // string strNW;
-  // ptreeToTrim->ConsNewick( strNW, false, 1.0, true );
-  // cout << strNW << endl;
-
-  // for a large tree, we want to randomly trim some leaves to make the tree
-  // smaller rule: never completely delete some leaf label; prefer to deleting
-  // leaves that appear more frequently
-  map<int, set<TreeNode *> > mapLeafLblToNodes;
-  vector<TreeNode *> listLeafNodes;
-  ptreeToTrim->GetAllLeafNodes(listLeafNodes);
-  for (int i = 0; i < (int)listLeafNodes.size(); ++i) {
-    int lbl = listLeafNodes[i]->GetIntLabel();
-    if (mapLeafLblToNodes.find(lbl) == mapLeafLblToNodes.end()) {
-      set<TreeNode *> ss;
-      mapLeafLblToNodes.insert(map<int, set<TreeNode *> >::value_type(lbl, ss));
-    }
-    mapLeafLblToNodes[lbl].insert(listLeafNodes[i]);
-  }
-  // create a list of nodes to remove
-  vector<set<TreeNode *> > listNodesToRemove;
-  vector<double> listNodesToRemoveSz;
-  for (map<int, set<TreeNode *> >::iterator it = mapLeafLblToNodes.begin();
-       it != mapLeafLblToNodes.end(); ++it) {
+void AssignConsecutiveIdsForTree(PhylogenyTreeBasic &treeToChange)
+{
     //
-    listNodesToRemove.push_back(it->second);
-    listNodesToRemoveSz.push_back(it->second.size());
-  }
-  // now start removing
-  int numLeavesCurr = ptreeToTrim->GetNumLeaves();
-  while (numLeavesCurr > numLeavesRemain) {
-    int indexChosen = GetWeightedRandItemIndex(listNodesToRemoveSz);
-
-    if (listNodesToRemoveSz[indexChosen] < 1.01) {
-      // cannot delete the one with only one copy left
-      continue;
-    }
-    YW_ASSERT_INFO(listNodesToRemove[indexChosen].size() >= 2, "Wrong");
-    TreeNode *pnToRm = *(listNodesToRemove[indexChosen].begin());
-    listNodesToRemove[indexChosen].erase(pnToRm);
-    --numLeavesCurr;
-    TreeNode *pnPar = pnToRm->GetParent();
-    ptreeToTrim->RemoveNode(pnToRm);
-    ptreeToTrim->RemoveDegreeOneNodeAt(pnPar);
-    listNodesToRemoveSz[indexChosen] -= 1.0;
-  }
-  AssignConsecutiveIdsForTree(*ptreeToTrim);
-  // cout << "RandTrimLeavesFromTree: After trimming: tree is: ";
-  // string strNW2;
-  // ptreeToTrim->ConsNewick( strNW2, false, 1.0, true );
-  // cout << strNW2 << endl;
-}
-
-// ***************************************************************************
-void NewickUtils ::RetrieveLabelSet(const string &strNW,
-                                    multiset<string> &setLabels) {
-  // cout << "RetrieveLabelSet: strNW = " << strNW << endl;
-  //
-  setLabels.clear();
-
-  string strIdDirect = strNW;
-  int curpos = 0;
-  int lastposOut = 0;
-  // char *strIdBuf = (char *)strIdDirect.c_str();
-  while (curpos < (int)strNW.length()) {
-    // cout << "curpos = " << curpos << endl;
-    bool fIdentifier = false;
-    if ((strNW[curpos] == '(' || strNW[curpos] == ',') &&
-        (curpos == (int)strNW.length() - 1 || strNW[curpos + 1] != '(')) {
-      fIdentifier = true;
-    }
-    // cout << "Adding it: " << strId[curpos] << endl;
-    lastposOut++;
-    curpos++;
-
-    // should we search for id
-    if (fIdentifier == true) {
-      // cout << "Now searching for identifier\n";
-      // now scan to the right to find the position to read the identifier
-      while (curpos < (int)strNW.length()) {
-        if (strNW[curpos] != ')' && strNW[curpos] != ':' &&
-            strNW[curpos] != ',') {
-          curpos++;
-        } else {
-          break;
-        }
-      }
-      //
-      // curpos--;
-      string strFoundId;
-      // cout << "lastposOut = " << lastposOut << ", curpos = " << curpos <<
-      // endl;
-      strFoundId = strNW.substr(lastposOut, curpos - lastposOut);
-      setLabels.insert(strFoundId);
-      lastposOut = curpos;
-      // cout << "One identifier found: " << strFoundId << endl;
-    }
-  }
-}
-
-bool NewickUtils ::FindSplitIn(const string &strNW, string &strPart1,
-                               string &strPart2) {
-  // break up the NW into two parts by the center ,
-  // return false if atomic
-  int posSplit = -1;
-  int level = 0;
-  for (int i = 0; i < (int)strNW.length(); ++i) {
-    if (strNW[i] == '(') {
-      level++;
-    } else if (strNW[i] == ')') {
-      level--;
-    } else if (strNW[i] == ',') {
-      if (level == 1) {
-        posSplit = i;
-        break;
-      }
-    }
-  }
-
-  if (posSplit < 0) {
-    return false;
-  }
-  //
-  int posLeft = strNW.find('(');
-  int posRight = strNW.rfind(')');
-  strPart1 = strNW.substr(posLeft + 1, posSplit - posLeft - 1);
-  strPart2 = strNW.substr(posSplit + 1, posRight - posSplit - 1);
-
-  return true;
-}
-
-void NewickUtils ::UpdateLabells(string &strNW,
-                                 const map<string, string> &mapOldLabelToNew) {
-  // change the taxa name in the old newick format to the new ones as recorded
-  // in the map
-  string strNWNew;
-  string strIdDirect = strNW;
-  int curpos = 0;
-  int lastposOut = 0;
-  map<string, string> &mapOldLabelToNewRef =
-      const_cast<map<string, string> &>(mapOldLabelToNew);
-  // bool fOutputCurChar = true;
-  // char *strIdBuf = (char *)strIdDirect.c_str();
-  while (curpos < (int)strNW.length()) {
-    // cout << "curpos = " << curpos << endl;
-    bool fIdentifier = false;
-    if ((strNW[curpos] == '(' || strNW[curpos] == ',') &&
-        (curpos == (int)strNW.length() - 1 || strNW[curpos + 1] != '(')) {
-      fIdentifier = true;
-    }
-
-    // add it always since this is deliminator
-    strNWNew += strNW[curpos];
-
-    // cout << "Adding it: " << strId[curpos] << endl;
-    lastposOut++;
-    curpos++;
-
-    // should we search for id
-    if (fIdentifier == true) {
-      // cout << "Now searching for identifier\n";
-      // now scan to the right to find the position to read the identifier
-      while (curpos < (int)strNW.length()) {
-        if (strNW[curpos] != ')' && strNW[curpos] != ':' &&
-            strNW[curpos] != ',') {
-          curpos++;
-        } else {
-          break;
-        }
-      }
-      //
-      // curpos--;
-      string strFoundId;
-      // cout << "lastposOut = " << lastposOut << ", curpos = " << curpos <<
-      // endl;
-      strFoundId = strNW.substr(lastposOut, curpos - lastposOut);
-
-      //
-      YW_ASSERT_INFO(mapOldLabelToNew.find(strFoundId) !=
-                         mapOldLabelToNew.end(),
-                     "Fail to find the id in the map");
-      strNWNew.append(mapOldLabelToNewRef[strFoundId]);
-
-      lastposOut = curpos;
-      // cout << "One identifier found: " << strFoundId << endl;
-
-      // now move back by one letter
-      //--curpos;
-    }
-  }
-
-  // cout << "UpdateLabells: before update, newick = " << strNW << ", after
-  // update: " << strNWNew << endl;
-  strNW = strNWNew;
-}
-
-string NewickUtils ::RemoveBrLenFromTree(string &strNW) {
-  //
-  int curpos = 0;
-  bool fSkip = false;
-  string strNWNew;
-  // char *strIdBuf = (char *)strIdDirect.c_str();
-  while (curpos < (int)strNW.length()) {
-    // cout << "curpos = " << curpos << endl;
-    if (strNW[curpos] == ':') {
-      fSkip = true;
-    } else if (strNW[curpos] == ',' || strNW[curpos] == ')' ||
-               strNW[curpos] == ';') {
-      // continue skipping until reaching the separate: , or )
-      fSkip = false;
-    }
-
-    if (fSkip == false) {
-      strNWNew += strNW[curpos];
-    }
-
-    curpos++;
-  }
-  return strNWNew;
-}
-
-void NewickUtils ::ConsolidateSinglChildChain(string &strNW) {
-  if (strNW[0] != '(') {
-    // nothing needs to be done
-    return;
-  }
-
-  // cout << "conslidate: " << strNW << endl;
-  // sometime there may be a nested chain of enclosed parenthesis
-  // consolidate these; and maintain the proper branch length if there are
-  string strRes = strNW;
-  double lenTot = 0.0;
-  bool fLen = false;
-  // bool fParenthRemoved = false;
-
-  while (true) {
-    // cout << "current string: " << strRes << endl;
-    // stop if it become automic
-    string str1, str2;
-    bool fNonAtom = FindSplitIn(strRes, str1, str2);
-
-    // if( fNonAtom == false )
-    //{
-    // fParenthRemoved = true;
-
-    //
-    YW_ASSERT_INFO(strRes[0] == '(', "wrong");
-    int posRight = strRes.rfind(')');
-    YW_ASSERT_INFO(posRight > 0, "wrong1");
-    // cout << "posRight: " << posRight << endl;
-    if (posRight != (int)strRes.length() - 1) {
-      int posLen = strRes.find(':', posRight);
-      // cout << "posLen: " << posLen << endl;
-      if (posLen > 0) {
-        // if( lenTot > 0.0)
-        //{
-        // cout << "*HHHHH\n";
-        //}
-        fLen = true;
-        lenTot += GetLenAt(strRes, posLen + 1);
-        // cout << "lenTot: " << lenTot << endl;
-      }
+    vector<TreeNode *> listAllNodes;
+    treeToChange.GetAllNodes(listAllNodes);
+    int idToUse = 0;
+    for (int i = 0; i < (int)listAllNodes.size(); ++i)
+    {
+        // leaves assigned to a distinct id first
+        if (listAllNodes[i]->IsLeaf() == true)
+        {
+            listAllNodes[i]->SetID(idToUse++);
+        }
+    }
+    for (int i = 0; i < (int)listAllNodes.size(); ++i)
+    {
+        // leaves assigned to a distinct id first
+        if (listAllNodes[i]->IsLeaf() == false)
+        {
+            listAllNodes[i]->SetID(idToUse++);
+        }
     }
-    //}
-
-    int len = posRight - 1;
-    strRes = strRes.substr(1, len);
-
-    if (fNonAtom == true) {
-      break;
-    }
-  }
-  string strRes1;
-  // if( fParenthRemoved )
-  //{
-  strRes1 += "(";
-  //}
-  strRes1 += strRes;
-  // if( fParenthRemoved )
-  //{
-  strRes1 += ")";
-  //}
-  if (fLen) {
-    strRes1 += ":" + std::to_string(lenTot);
-  }
-
-  strNW = strRes1;
-  // cout << "conslidate to " << strNW << endl;
-}
-
-double NewickUtils ::GetLenAt(const string &strNW, int posLen) {
-  //
-  int posLenEnd = strNW.length() - 1;
-  int sepPos1 = strNW.find(',', posLen);
-  int sepPos2 = strNW.find(')', posLen);
-  if (sepPos1 > 0 && sepPos1 - 1 < posLenEnd) {
-    posLenEnd = sepPos1 - 1;
-  }
-  if (sepPos2 > 0 && sepPos2 - 1 < posLenEnd) {
-    posLenEnd = sepPos2 - 1;
-  }
-  if (posLenEnd <= posLen) {
-    cout << "posLen: " << posLen << ", posLenEnd: " << posLenEnd
-         << ", tree: " << strNW << endl;
-  }
-  YW_ASSERT_INFO(posLenEnd >= posLen, "No length found");
-  string lenstr = strNW.substr(posLen, posLenEnd - posLen + 1);
-  return atof(lenstr.c_str());
 }
 
-// ***************************************************************************
+void RandTrimLeavesFromTree(PhylogenyTreeBasic *ptreeToTrim, int numLeavesRemain)
+{
+    // do nothing if the gene trees are small
+    if (ptreeToTrim->GetNumLeaves() <= numLeavesRemain)
+    {
+        return;
+    }
 
-TaxaMapper ::TaxaMapper() {
-  curId = 0;
-  fInit = false;
-}
+    //cout << "RandTrimLeavesFromTree: before trimming: tree is: ";
+    //string strNW;
+    //ptreeToTrim->ConsNewick( strNW, false, 1.0, true );
+    //cout << strNW << endl;
 
-// utility
-bool TaxaMapper ::IsEmpty() { return mapStrToId.size() == 0; }
-
-int TaxaMapper ::AddTaxaString(const string &str) {
-  // cout << "AddTaxaString : curId = " << curId  << " for new taxa string " <<
-  // str << endl;
-  if (mapStrToId.find(str) == mapStrToId.end()) {
-    mapStrToId.insert(map<string, int>::value_type(str, curId));
-    mapIdToStr.insert(map<int, string>::value_type(curId, str));
-    curId++;
-  }
-  // else
-  //{
-  return mapStrToId[str];
-  //}
-}
-
-void TaxaMapper ::AddTaxaStringWithId(int tid, const string &str) {
-  // caution: don't mix up with the previous auto-id mode
-  mapStrToId.insert(map<string, int>::value_type(str, tid));
-  mapIdToStr.insert(map<int, string>::value_type(tid, str));
-}
-
-int TaxaMapper ::GetId(const string &str) {
-  // cout << "Num of entries in str mapper : " << mapStrToId.size() << endl;
-  // for( map<string,int> :: iterator it =mapStrToId.begin(); it !=
-  // mapStrToId.end(); ++it )
-  //{
-  // cout << it->first << ", " << it->second << endl;
-  //}
-
-  if (mapStrToId.find(str) == mapStrToId.end()) {
-    // when the str is not pre-recorded, return negative value
-    return -1;
-    // cout << "This taxa: " << str << " seems to be wrong\n";
-    // YW_ASSERT_INFO( false, "Fail to find the taxa" );
-  }
-  return mapStrToId[str];
-}
-bool TaxaMapper ::IsIdIn(int id) {
-  return mapIdToStr.find(id) != mapIdToStr.end();
-}
-
-string TaxaMapper ::GetString(const int id) {
-  if (mapIdToStr.find(id) == mapIdToStr.end()) {
-    cout << "mapIdToStr: ";
-    for (map<int, string>::iterator it = mapIdToStr.begin();
-         it != mapIdToStr.end(); ++it) {
-      cout << "[" << it->first << "," << it->second << "]  ";
+    // for a large tree, we want to randomly trim some leaves to make the tree smaller
+    // rule: never completely delete some leaf label; prefer to deleting
+    // leaves that appear more frequently
+    map<int, set<TreeNode *>> mapLeafLblToNodes;
+    vector<TreeNode *> listLeafNodes;
+    ptreeToTrim->GetAllLeafNodes(listLeafNodes);
+    for (int i = 0; i < (int)listLeafNodes.size(); ++i)
+    {
+        int lbl = listLeafNodes[i]->GetIntLabel();
+        if (mapLeafLblToNodes.find(lbl) == mapLeafLblToNodes.end())
+        {
+            set<TreeNode *> ss;
+            mapLeafLblToNodes.insert(map<int, set<TreeNode *>>::value_type(lbl, ss));
+        }
+        mapLeafLblToNodes[lbl].insert(listLeafNodes[i]);
     }
-    cout << endl;
+    // create a list of nodes to remove
+    vector<set<TreeNode *>> listNodesToRemove;
+    vector<double> listNodesToRemoveSz;
+    for (map<int, set<TreeNode *>>::iterator it = mapLeafLblToNodes.begin(); it != mapLeafLblToNodes.end(); ++it)
+    {
+        //
+        listNodesToRemove.push_back(it->second);
+        listNodesToRemoveSz.push_back(it->second.size());
+    }
+    // now start removing
+    int numLeavesCurr = ptreeToTrim->GetNumLeaves();
+    while (numLeavesCurr > numLeavesRemain)
+    {
+        int indexChosen = GetWeightedRandItemIndex(listNodesToRemoveSz);
 
-    cout << "This taxa id: " << id << " seems to be wrong\n";
-    YW_ASSERT_INFO(false, "Fail to find the taxa");
-  }
-  return mapIdToStr[id];
+        if (listNodesToRemoveSz[indexChosen] < 1.01)
+        {
+            // cannot delete the one with only one copy left
+            continue;
+        }
+        YW_ASSERT_INFO(listNodesToRemove[indexChosen].size() >= 2, "Wrong");
+        TreeNode *pnToRm = *(listNodesToRemove[indexChosen].begin());
+        listNodesToRemove[indexChosen].erase(pnToRm);
+        --numLeavesCurr;
+        TreeNode *pnPar = pnToRm->GetParent();
+        ptreeToTrim->RemoveNode(pnToRm);
+        ptreeToTrim->RemoveDegreeOneNodeAt(pnPar);
+        listNodesToRemoveSz[indexChosen] -= 1.0;
+    }
+    AssignConsecutiveIdsForTree(*ptreeToTrim);
+    //cout << "RandTrimLeavesFromTree: After trimming: tree is: ";
+    //string strNW2;
+    //ptreeToTrim->ConsNewick( strNW2, false, 1.0, true );
+    //cout << strNW2 << endl;
 }
 
-string TaxaMapper ::ConvIdStringWithOrigTaxa(const string &strId) {
-#if 0
-cout << "strID: " << strId << ": Num of entries in str mapper : " << mapIdToStr.size() << endl;
-for( map<int,string> :: iterator it =mapIdToStr.begin(); it != mapIdToStr.end(); ++it )
+// ***************************************************************************
+void NewickUtils ::RetrieveLabelSet(const string &strNW, multiset<string> &setLabels)
 {
-cout << it->first << ", " << it->second << endl;
-}
-#endif
-  // convert a string with id (i.e. integer-based identifier) back
-  // to user-specified format
-  // Simple approach: find everything bebetween ( and , (or :),  and ) and
-  // convert to
-  // YW: 05/02/19: also allow '#' as seperator to support mutation tree
-  string res;
-  string strIdDirect = strId;
-  int curpos = 0;
-  int lastposOut = 0;
-  //	char *strIdBuf = (char *)strIdDirect.c_str();
-  while (curpos < (int)strId.length()) {
-    // cout << "curpos = " << curpos << ", res = " << res << endl;
-    bool fIdentifier = false;
-    if ((strId[curpos] == '(' || strId[curpos] == ',' ||
-         strId[curpos] == '#') &&
-        (curpos == (int)strId.length() - 1 ||
-         (strId[curpos + 1] != '(' && strId[curpos + 1] != '#'))) {
-      fIdentifier = true;
-    }
-    // cout << "Adding it: " << strId[curpos] << endl;
-    res += strId[curpos];
-    lastposOut++;
-    curpos++;
-
-    // should we search for id
-    if (fIdentifier == true) {
-      // cout << "Now searching for identifier\n";
-      // now scan to the right to find the position to read the identifier
-      while (curpos < (int)strId.length()) {
-        if (strId[curpos] != ')' && strId[curpos] != ':' &&
-            strId[curpos] != ',' && strId[curpos] != '#') {
-          curpos++;
-        } else {
-          break;
-        }
-      }
-      // cout << "lastposOut: " << lastposOut << ", curpos = " << curpos <<
-      // endl;
-      //
-      // curpos--;
-      int idnum = -1;
-      string strSub = strId.substr(lastposOut, curpos - lastposOut);
-      // char buftmp[100];
-      // memcpy(buftmp, &strIdBuf[lastposOut], curpos-lastposOut );
-      // sscanf(buftmp, "%d", &idnum);
-      sscanf(strSub.c_str(), "%d", &idnum);
-      string idNew = GetString(idnum);
-      ////cout << "After searching, curpos = " << curpos << ", buftmp = " <<
-      /// buftmp  << ", idnum = " << idnum << ", idNew = " << idNew << endl;
-      // cout << "After searching, curpos = " << curpos << ", strSub = " <<
-      // strSub  << ", idnum = " << idnum << ", idNew = " << idNew << endl; char
-      // buf[100]; sprintf(buf, "%d", idNew);
-      res += idNew;
-      lastposOut = curpos;
-    }
-  }
-  return res;
-}
-
-string TaxaMapper ::ExtractIdPartFromStr(const string &strIdNW) {
-  // extract id part of the string
-  string strToUse = strIdNW;
-  size_t posSeparator = strIdNW.find(':');
-
-  if (posSeparator != string::npos) {
-    strToUse = strIdNW.substr(0, (int)posSeparator);
-  }
-  return strToUse;
-}
-
-int TaxaMapper ::GetIdFromStr(const string &strPart, TaxaMapper *pTMapper) {
-  // cout << "GetIdFromStr: " << strPart << endl;
-
-  string strToUse = strPart;
-  size_t posSeparator = strPart.find(':');
-
-  if (posSeparator != string::npos) {
-    strToUse = strPart.substr(0, (int)posSeparator);
-  }
-
-  // 05/07/15: it is also possible user add gene index (in # sign)
-  size_t posSeparator2 = strToUse.find('#');
-  if (posSeparator2 != string::npos) {
-    strToUse = strToUse.substr(0, (int)posSeparator2);
-  }
-  // cout << "strPart: " << strPart << ",strUse: " << strToUse << endl;
-
-  // get rid of
-  int res = -1;
-  if (pTMapper == NULL) {
-    sscanf(strToUse.c_str(), "%d", &res);
-    // cout << "Empty mapper\n";
-  } else {
-    // are we reading in the first tree or not
-    res = pTMapper->GetId(strToUse);
-    // if( pTMapper->IsInitialized() == true )
-    //{
-    //	res  = pTMapper->GetId(strToUse);
-    // cout << "GetIdFromStr: GetId: " << strToUse << ": " << res << endl;
-    //}
-    // else
-    if (res < 0) {
-      // this label is not seen before, so we add a new record
-      // this is new
-      res = pTMapper->AddTaxaString(strToUse);
-      // cout << "GetIdFromStr: New id: " << strToUse << ": " << res << endl;
-    }
-  }
-  return res;
-}
-
-void TaxaMapper ::GetAllTaxaIds(set<int> &taxaIndices) const {
-  //
-  taxaIndices.clear();
-  for (map<int, string>::const_iterator it = mapIdToStr.begin();
-       it != mapIdToStr.end(); ++it) {
-    taxaIndices.insert(it->first);
-  }
-}
-
-void TaxaMapper ::GetAllTaxaStrs(set<string> &setStrs) const {
-  //
-  setStrs.clear();
-  for (map<int, string>::const_iterator it = mapIdToStr.begin();
-       it != mapIdToStr.end(); ++it) {
-    setStrs.insert(it->second);
-  }
-}
-
-void TaxaMapper ::InitToDec1Mode(int numTaxa) {
-  // assume taxa is in the format as 1, 2, 3 and so on
-  // init as follows: 1 ==> 0, 2 ==> 1 and so on
-  for (int taxa = 1; taxa <= numTaxa; ++taxa) {
-    char buf[100];
-    sprintf(buf, "%d", taxa);
-    string strid = buf;
-    AddTaxaString(strid);
-  }
-  SetInitialized(true);
-}
-
-void TaxaMapper ::Dump() const {
-  //
-  cout << "curId = " << curId;
-  if (fInit == true) {
-    cout << "initialized. ";
-  } else {
-    cout << "not initialized yet. ";
-  }
-  for (map<string, int>::const_iterator it = mapStrToId.begin();
-       it != mapStrToId.end(); ++it) {
+    //cout << "RetrieveLabelSet: strNW = " << strNW << endl;
     //
-    cout << "Mapping taxa " << it->first << " to id: " << it->second << "  ";
-  }
-  cout << endl;
-}
+    setLabels.clear();
 
-// ***************************************************************************
-// Tree class functions
-// ***************************************************************************
-TreeNode ::TreeNode()
-    : parent(NULL), id(-1), label("-"), shape(PHY_TN_DEFAULT_SHAPE),
-      lenBranchAbove(-1.0) {}
-
-TreeNode ::TreeNode(int iid)
-    : parent(NULL), id(iid), label("-"), shape(PHY_TN_DEFAULT_SHAPE),
-      lenBranchAbove(-1.0) {
-  //    id = iid;
-  //    cout << "Creating tree node " << iid << endl;
-}
-
-TreeNode ::~TreeNode() {
-  // cout << "Deleting tree node " << id << ", number of children: " <<
-  // GetChildrenNum() << endl; cout << "Dump: "; Dump();
-  // We recursively delete all its children here
-  for (int i = 0; i < (int)listChildren.size(); ++i) {
-    delete listChildren[i];
-  }
-  listChildren.clear();
-}
-
-void TreeNode::Dump() const {
-  //
-  cout << "<node: " << GetLabel() << ", id=" << GetID();
-  if (lenBranchAbove >= 0.0) {
-    cout << ", length = " << lenBranchAbove;
-  }
-  cout << ", num of child = " << GetChildrenNum() << ">   ";
-}
-
-TreeNode *TreeNode ::Copy() {
-  // make a copy (and its descendents)
-  TreeNode *pCopy = new TreeNode(GetID());
-  pCopy->SetLabel(this->GetLabel());
-  pCopy->SetUserLabel(this->GetUserLabel());
-  pCopy->lenBranchAbove = this->lenBranchAbove;
-  pCopy->nodeValues = this->nodeValues;
-  for (int i = 0; i < GetChildrenNum(); ++i) {
-    TreeNode *pccopy = GetChild(i)->Copy();
-    vector<int> listLbelsCopy;
-    if ((int)this->listEdgeLabels.size() >= i + 1) {
-      listLbelsCopy = this->listEdgeLabels[i];
-    }
-    pCopy->AddChild(pccopy, listLbelsCopy);
-  }
-  return pCopy;
-}
-
-void TreeNode ::AddChild(TreeNode *pChild, const vector<int> &labels) {
-  // This function add an edge. The edge can be labeled with a set of labels
-  // (for now, only integers)
-  YW_ASSERT(pChild != NULL);
-
-  // make sure this child is not already a children
-  // not sure if really need it
-
-  pChild->parent = this;
-  listChildren.push_back(pChild);
-  listEdgeLabels.push_back(labels);
-}
-
-void TreeNode ::AddEdgeLabelToChild(int cIndex, int lbl) {
-  YW_ASSERT_INFO(cIndex < GetChildrenNum(), "Overflow");
-  this->listEdgeLabels[cIndex].push_back(lbl);
-}
-
-void TreeNode ::RemoveChild(TreeNode *pChild) {
-  YW_ASSERT_INFO(pChild != NULL, "RemoveChild: wrong");
-  pChild->parent = NULL;
-  vector<TreeNode *> listChildrenNew;
-  vector<vector<int> > listEdgeLabelsNew;
-  YW_ASSERT_INFO(listChildrenNew.size() == listEdgeLabelsNew.size(),
-                 "must be same size");
-  for (int i = 0; i < (int)listChildren.size(); ++i) {
-    if (listChildren[i] != pChild) {
-      listChildrenNew.push_back(listChildren[i]);
-      listEdgeLabelsNew.push_back(listEdgeLabels[i]);
-    }
-  }
-  // update
-  listChildren = listChildrenNew;
-  listEdgeLabels = listEdgeLabelsNew;
-}
-
-void TreeNode ::RemoveAllChildren() {
-  // remove all children of this node
-  // listChildren.clear();
-  // listEdgeLabels.clear();
-  while (GetChildrenNum() > 0) {
-    TreeNode *pc = GetChild(0);
-    // cout << "Removing pc = ";
-    // pc->Dump();
-    // cout << endl;
-    RemoveChild(pc);
-  }
-  // cout << "Done with removeallchildren\n";
-}
-
-void TreeNode ::DetachAllChildren() {
-  // diff from RemoveAllChildren, simply detach the children from the parent
-  // (i.e. parent no longer has record for these children)
-  this->listChildren.clear();
-  this->listEdgeLabels.clear();
-}
-
-void TreeNode ::DetachSelf() {
-  // detach this node from parent (but don't perform any memory release)
-  TreeNode *pp = GetParent();
-
-  if (pp != NULL) {
-    //
-    pp->RemoveChild(this);
-  }
-}
-
-void TreeNode ::GetDescendentLabelSet(set<int> &labelSet) {
-  // This function accumulate the set of descendents in the label sets
-  // CAUTION: assume labelset is EMPTY!!!!
-  // if( IsLeaf() == true)
-  //{
-  string lbl = GetLabel();
-  // cout << "lbl = " << lbl << endl;
-
-  if (lbl != "-" && lbl != "?" && lbl != "()" && lbl != "(?)") {
-    const char *buf = lbl.c_str();
-    int rowIndex;
-    if (buf[0] < '0' || buf[0] > '9') {
-      sscanf(buf + 1, "%d", &rowIndex);
-    } else {
-      // This is a plain label, use it
-      sscanf(buf, "%d", &rowIndex);
-    }
-    // cout << "rowIndex = " << rowIndex << endl;
-    labelSet.insert(rowIndex);
-  } else if (nodeValues.size() >= 1) {
-    // simply insert a single value here
-    // labelSet.insert( nodeValues[0] );
-  }
+    string strIdDirect = strNW;
+    int curpos = 0;
+    int lastposOut = 0;
+    //char *strIdBuf = (char *)strIdDirect.c_str();
+    while (curpos < (int)strNW.length())
+    {
+        //cout << "curpos = " << curpos << endl;
+        bool fIdentifier = false;
+        if ((strNW[curpos] == '(' || strNW[curpos] == ',') && (curpos == (int)strNW.length() - 1 || strNW[curpos + 1] != '('))
+        {
+            fIdentifier = true;
+        }
+        //cout << "Adding it: " << strId[curpos] << endl;
+        lastposOut++;
+        curpos++;
 
-#if 0
-        // set every label into the set
-        for(int i=0; i<nodeValues.size(); ++i)
+        // should we search for id
+        if (fIdentifier == true)
         {
-            if( nodeValues[i] >= 0 )
+            //cout << "Now searching for identifier\n";
+            // now scan to the right to find the position to read the identifier
+            while (curpos < (int)strNW.length())
             {
-                labelSet.insert( nodeValues[i] );
+                if (strNW[curpos] != ')' && strNW[curpos] != ':' && strNW[curpos] != ',')
+                {
+                    curpos++;
+                }
+                else
+                {
+                    break;
+                }
             }
+            //
+            //curpos--;
+            string strFoundId;
+            //cout << "lastposOut = " << lastposOut << ", curpos = " << curpos << endl;
+            strFoundId = strNW.substr(lastposOut, curpos - lastposOut);
+            setLabels.insert(strFoundId);
+            lastposOut = curpos;
+            //cout << "One identifier found: " << strFoundId << endl;
         }
-#endif
-  //}
-  // else
-  if (IsLeaf() == false) {
-    for (int i = 0; i < GetChildrenNum(); ++i) {
-      GetChild(i)->GetDescendentLabelSet(labelSet);
     }
-  }
 }
 
-bool TreeNode ::IsAncesterOf(TreeNode *pAssumedDescend, int &branchIndex) {
-  // This function check to see if pAssumedDescend is descedent of the current
-  // node If so, we also find the branch index that comes to this node
-  if (pAssumedDescend == NULL) {
-    return false;
-  }
-  if (pAssumedDescend == this) {
-    branchIndex = -1;
-    return true;
-  }
-
-  TreeNode *pCurrent = pAssumedDescend;
-  TreeNode *pParent = pAssumedDescend->parent;
-
-  while (pParent != NULL) {
-    if (pParent == this) {
-      // Find out which branch leads to it
-      branchIndex = -1;
-      for (int i = 0; i < (int)listChildren.size(); ++i) {
-        if (listChildren[i] == pCurrent) {
-          branchIndex = i;
+bool NewickUtils ::FindSplitIn(const string &strNW, string &strPart1, string &strPart2)
+{
+    // break up the NW into two parts by the center ,
+    // return false if atomic
+    int posSplit = -1;
+    int level = 0;
+    for (int i = 0; i < (int)strNW.length(); ++i)
+    {
+        if (strNW[i] == '(')
+        {
+            level++;
+        }
+        else if (strNW[i] == ')')
+        {
+            level--;
+        }
+        else if (strNW[i] == ',')
+        {
+            if (level == 1)
+            {
+                posSplit = i;
+                break;
+            }
         }
-      }
-      YW_ASSERT(branchIndex >= 0);
-      // Tell the good news
-      return true;
     }
-    pCurrent = pParent;
-    pParent = pParent->parent;
-  }
 
-  return false;
-}
+    if (posSplit < 0)
+    {
+        return false;
+    }
+    //
+    int posLeft = strNW.find('(');
+    int posRight = strNW.rfind(')');
+    strPart1 = strNW.substr(posLeft + 1, posSplit - posLeft - 1);
+    strPart2 = strNW.substr(posSplit + 1, posRight - posSplit - 1);
 
-void TreeNode ::GetAllDescendents(set<TreeNode *> &setDescendents) {
-  // Note: include itself
-  setDescendents.insert(this);
-  for (int i = 0; i < (int)listChildren.size(); ++i) {
-    listChildren[i]->GetAllDescendents(setDescendents);
-  }
+    return true;
 }
 
-void TreeNode ::GetAllLeavesUnder(set<TreeNode *> &setDescendents) {
-  // Note: include itself
-  if (this->IsLeaf() == true) {
-    setDescendents.insert(this);
-  }
-  for (int i = 0; i < (int)listChildren.size(); ++i) {
-    listChildren[i]->GetAllLeavesUnder(setDescendents);
-  }
-}
-
-void TreeNode ::GetAllLeavesIdUnder(set<int> &setDescendents) {
-  set<TreeNode *> ss;
-  GetAllLeavesUnder(ss);
-  setDescendents.clear();
-  for (set<TreeNode *>::iterator it = ss.begin(); it != ss.end(); ++it) {
-    setDescendents.insert((*it)->GetID());
-  }
-}
-
-void TreeNode ::GetAllDescendIntLbls(set<int> &setIntLbs) {
-  //
-  if (this->IsLeaf() == true) {
-    setIntLbs.insert(this->GetIntLabel());
-  } else {
-    for (int i = 0; i < (int)listChildren.size(); ++i) {
-      listChildren[i]->GetAllDescendIntLbls(setIntLbs);
-    }
-  }
-}
-
-void TreeNode ::GetAllLeafLabeles(vector<string> &listLeafLabels) {
-  //
-  if (IsLeaf() == true) {
-    listLeafLabels.push_back(GetLabel());
-  } else {
-    for (int i = 0; i < (int)listChildren.size(); ++i) {
-      listChildren[i]->GetAllLeafLabeles(listLeafLabels);
-    }
-  }
-}
-void TreeNode ::GetAllLeafIntLabeles(vector<int> &listLeafLabels) {
-  //
-  if (IsLeaf() == true) {
-    listLeafLabels.push_back(GetIntLabel());
-  } else {
-    for (int i = 0; i < (int)listChildren.size(); ++i) {
-      listChildren[i]->GetAllLeafIntLabeles(listLeafLabels);
-    }
-  }
-}
-
-void TreeNode ::GetAllDistinctLeafLabeles(set<string> &setLeafLabels) {
-  //
-  vector<string> listLeafLabels;
-  GetAllLeafLabeles(listLeafLabels);
-  PopulateSetByVecGen(setLeafLabels, listLeafLabels);
-}
-
-string TreeNode ::GetShapeLabel(const set<int> &idTerms,
-                                map<int, int> &mapNodeLabel) const {
-  // cout << "idTerms = ";
-  // DumpIntSet( idTerms );
-  string res;
-
-  // return a shape label:
-  // at present, shape label is like ((),(())). That is, no leaf labels
-  // just the type of topology. Note if we have (S1,S2), then S1 <= S2
-  if (idTerms.find(GetID()) != idTerms.end()) {
-    int idNum = 1;
-    if (mapNodeLabel.find(GetID()) != mapNodeLabel.end()) {
-      idNum = mapNodeLabel[GetID()];
+void NewickUtils ::UpdateLabells(string &strNW, const map<string, string> &mapOldLabelToNew)
+{
+    // change the taxa name in the old newick format to the new ones as recorded in the map
+    string strNWNew;
+    string strIdDirect = strNW;
+    int curpos = 0;
+    int lastposOut = 0;
+    map<string, string> &mapOldLabelToNewRef = const_cast<map<string, string> &>(mapOldLabelToNew);
+    //bool fOutputCurChar = true;
+    //char *strIdBuf = (char *)strIdDirect.c_str();
+    while (curpos < (int)strNW.length())
+    {
+        //cout << "curpos = " << curpos << endl;
+        bool fIdentifier = false;
+        if ((strNW[curpos] == '(' || strNW[curpos] == ',') && (curpos == (int)strNW.length() - 1 || strNW[curpos + 1] != '('))
+        {
+            fIdentifier = true;
+        }
+
+        // add it always since this is deliminator
+        strNWNew += strNW[curpos];
+
+        //cout << "Adding it: " << strId[curpos] << endl;
+        lastposOut++;
+        curpos++;
+
+        // should we search for id
+        if (fIdentifier == true)
+        {
+            //cout << "Now searching for identifier\n";
+            // now scan to the right to find the position to read the identifier
+            while (curpos < (int)strNW.length())
+            {
+                if (strNW[curpos] != ')' && strNW[curpos] != ':' && strNW[curpos] != ',')
+                {
+                    curpos++;
+                }
+                else
+                {
+                    break;
+                }
+            }
+            //
+            //curpos--;
+            string strFoundId;
+            //cout << "lastposOut = " << lastposOut << ", curpos = " << curpos << endl;
+            strFoundId = strNW.substr(lastposOut, curpos - lastposOut);
+
+            //
+            YW_ASSERT_INFO(mapOldLabelToNew.find(strFoundId) != mapOldLabelToNew.end(), "Fail to find the id in the map");
+            strNWNew.append(mapOldLabelToNewRef[strFoundId]);
+
+            lastposOut = curpos;
+            //cout << "One identifier found: " << strFoundId << endl;
+
+            // now move back by one letter
+            //--curpos;
+        }
     }
-    char buf[100];
-    sprintf(buf, "%d", idNum);
-    res = buf;
-    // string str1 = "A";
-    // return str1;
-  }
-  // else
-  //	{
-  //		string strEmpty;
-  //		res = strEmpty;
-  //	}
-  //}
-  else {
-    // otherwise get its descendent
-    vector<string> listLabels;
-    for (int i = 0; i < (int)listChildren.size(); ++i) {
-      listLabels.push_back(
-          listChildren[i]->GetShapeLabel(idTerms, mapNodeLabel));
-    }
-    // now sort it
-    for (int i = 0; i < (int)listLabels.size(); ++i) {
-      for (int j = i + 1; j < (int)listLabels.size(); ++j) {
-        // swap if needed
-        if (listLabels[i] > listLabels[j]) {
-          string tmp = listLabels[i];
-          listLabels[i] = listLabels[j];
-          listLabels[j] = tmp;
-        }
-      }
-    }
-
-    // how many are not empty?
-    int numNonEmpty = 0;
-    for (int i = 0; i < (int)listLabels.size(); ++i) {
-      if (listLabels[i].length() > 0) {
-        numNonEmpty++;
-      }
-    }
-
-    // add it
-    bool fStart = false;
-    for (vector<string>::iterator it = listLabels.begin();
-         it != listLabels.end(); ++it) {
-      if (it->length() > 0) {
-        if (fStart == false) {
-          if (numNonEmpty > 1) {
-            // add a header
-            res = "(";
-          }
-        } else {
-          res += ",";
-        }
-        res += *it;
-
-        fStart = true;
-      }
-    }
-    if (fStart == true && numNonEmpty > 1)
-    // if( fStart == true  )
-    {
-      res += ")";
-    }
-  }
-  // cout << "res label for this node: " << res << endl;
-  return res;
+
+    //cout << "UpdateLabells: before update, newick = " << strNW << ", after update: " << strNWNew << endl;
+    strNW = strNWNew;
 }
 
-// differeent from above, this one will apply label to the string label
-string TreeNode ::GetShapeLabel(const set<int> &idTerms, bool fSort) const {
-  // cout << "idTerms = ";
-  // DumpIntSet( idTerms );
-  string res;
-
-  // return a shape label:
-  // at present, shape label is like ((),(())). That is, no leaf labels
-  // just the type of topology. Note if we have (S1,S2), then S1 <= S2
-  if (idTerms.find(GetID()) != idTerms.end()) {
-    // int idNum = 1;
-    if (fSort == true) {
-      res = "1";
-    } else {
-      char buf[100];
-      sprintf(buf, "%d", GetID());
-      res = buf;
-    }
-  }
-
-  else {
-    // otherwise get its descendent
-    vector<string> listLabels;
-    for (int i = 0; i < (int)listChildren.size(); ++i) {
-      listLabels.push_back(listChildren[i]->GetShapeLabel(idTerms, fSort));
-    }
-    // now sort it
-    if (fSort == true) {
-      for (int i = 0; i < (int)listLabels.size(); ++i) {
-        for (int j = i + 1; j < (int)listLabels.size(); ++j) {
-          // swap if needed
-          if (listLabels[i] > listLabels[j]) {
-            string tmp = listLabels[i];
-            listLabels[i] = listLabels[j];
-            listLabels[j] = tmp;
-          }
-        }
-      }
-    }
-
-    // how many are not empty?
-    int numNonEmpty = 0, numEmpty = 0;
-    for (int i = 0; i < (int)listLabels.size(); ++i) {
-      if (listLabels[i].length() > 0) {
-        numNonEmpty++;
-      } else {
-        numEmpty++;
-      }
-    }
-
-    // add it
-    bool fStart = false;
-    // bool fFirst = true;
-    bool fParenth = false;
-    // bool fSpaceAdded = false;
-    for (vector<string>::iterator it = listLabels.begin();
-         it != listLabels.end(); ++it) {
-      // YW: only add "(" if there are more than 1 non-empty below
-      if (fStart == false && it->length() > 0) {
-        // YW: just add a "("
-        // if(  (numNonEmpty >= 1 && numEmpty > 0 ) || numNonEmpty >= 2  )
-        //{
-        // add a header
-        if (numNonEmpty > 1) {
-          res = "(";
-          fParenth = true;
+string NewickUtils ::RemoveBrLenFromTree(string &strNW)
+{
+    //
+    int curpos = 0;
+    bool fSkip = false;
+    string strNWNew;
+    //char *strIdBuf = (char *)strIdDirect.c_str();
+    while (curpos < (int)strNW.length())
+    {
+        //cout << "curpos = " << curpos << endl;
+        if (strNW[curpos] == ':')
+        {
+            fSkip = true;
         }
-        res += *it;
-        fStart = true;
-        //}
-      } else if (fStart == true) {
-        // YW: only add "," if there is something
-        if (it->length() > 0) {
-          res += ",";
+        else if (strNW[curpos] == ',' || strNW[curpos] == ')' || strNW[curpos] == ';')
+        {
+            // continue skipping until reaching the separate: , or )
+            fSkip = false;
         }
-        // fFirst = false;
-        if (it->length() > 0) {
 
-          res += *it;
-          // fStart = true;
+        if (fSkip == false)
+        {
+            strNWNew += strNW[curpos];
         }
-        // YW: donot add anything if the branch is empty
-#if 0
-				else
-				{
-					// for empty branches, put a mark to it
-					// when there is something under it (that is shrink the entire subtree of unknown to a symbol -
-					if(numNonEmpty >= 1 && fSpaceAdded == false)
-					{
-						//
-						res += ",-";
-						fSpaceAdded = true;
-					}
-				}
-#endif
-      }
-    }
-    // if( fStart == true  && numNonEmpty >= 1)
-    if (fParenth == true) {
-      res += ")";
-    }
-  }
-  // cout << "res label for this node: " << res << endl;
-  return res;
-}
-
-string TreeNode::GetShapeLabelNodeBrNum(
-    map<TreeNode *, pair<int, int> > &mapNodeNumBrannches,
-    vector<int> &listOrderedLeaves) {
-  // format: <num of underlying branches, event id>, negative for internal nodes
-  // the ordered leaves: correspond to their order of appearing in the output
-  // newick shape string this can be useful when you want to know how to match
-  // the leaves when some sort of comparision is needed get shape label.
-  // Different from above, the input is: <treenode, #ofbranches out of this
-  // node> convention: if #br < 0, it means all branches have descendents
-  listOrderedLeaves.clear();
-  if (this->IsLeaf() == true) {
-    YW_ASSERT_INFO(mapNodeNumBrannches.find(this) != mapNodeNumBrannches.end(),
-                   "Leaf: not in map");
-    // cout << "Find one leaf: " << mapNodeNumBrannches[this].second << endl;
-    listOrderedLeaves.push_back(mapNodeNumBrannches[this].second);
-    return string("()");
-  } else {
-    YW_ASSERT_INFO(mapNodeNumBrannches.find(this) != mapNodeNumBrannches.end(),
-                   "Fail to find222");
-    // const TreeNode *pn = const_cast<const TreeNode *>( this );
-    int numBrWOChildRecur = mapNodeNumBrannches[this].first;
-    // cout << "numBrWOChildRecur = " << numBrWOChildRecur << endl;
-    multiset<string> setDescStrings;
-    map<string, set<vector<int> > > mapStringToVecLeaves;
-    for (int i = 0; i < (int)GetChildrenNum(); ++i) {
-      //
-      TreeNode *pnchild = GetChild(i);
-      //
-      if (mapNodeNumBrannches.find(pnchild) != mapNodeNumBrannches.end()) {
-        //
-        vector<int> listOrderedLeavesStep;
-        string str = pnchild->GetShapeLabelNodeBrNum(mapNodeNumBrannches,
-                                                     listOrderedLeavesStep);
-        setDescStrings.insert(str);
-        if (mapStringToVecLeaves.find(str) == mapStringToVecLeaves.end()) {
-          //
-          set<vector<int> > ssint;
-          mapStringToVecLeaves.insert(
-              map<string, set<vector<int> > >::value_type(str, ssint));
-        }
-        mapStringToVecLeaves[str].insert(listOrderedLeavesStep);
 
-        //
-        --numBrWOChildRecur;
-      }
+        curpos++;
     }
-    // add the remaiing by just filling the item
-    // vector<int> listLvIds;
-    for (int i = 0; i < numBrWOChildRecur; ++i) {
-      string strLv = "()";
-      setDescStrings.insert(strLv);
+    return strNWNew;
+}
 
-      //
-      if (mapStringToVecLeaves.find(strLv) == mapStringToVecLeaves.end()) {
-        //
-        set<vector<int> > ssint;
-        mapStringToVecLeaves.insert(
-            map<string, set<vector<int> > >::value_type(strLv, ssint));
-      }
-      vector<int> vec1;
-      vec1.push_back(mapNodeNumBrannches[this].second);
-      mapStringToVecLeaves[strLv].insert(vec1);
-    }
-    // cout << "setdescstrings: ";
-    // for(multiset<string> :: iterator itgg = setDescStrings.begin(); itgg !=
-    // setDescStrings.end(); ++itgg)
-    //{
-    // cout << *itgg << "   ";
-    //}
-    // cout << endl;
-    // now creat the contacation
-    YW_ASSERT_INFO(setDescStrings.size() > 1, "Can not be empty2");
-    string res = "(";
-    for (multiset<string>::iterator it = setDescStrings.begin();
-         it != setDescStrings.end(); ++it) {
-      if (it != setDescStrings.begin()) {
-        res += ",";
-      }
-      res += *it;
-    }
-    res += ")";
-
-    // now assemble the list of ordered nodes
-    for (map<string, set<vector<int> > >::iterator itg =
-             mapStringToVecLeaves.begin();
-         itg != mapStringToVecLeaves.end(); ++itg) {
-      for (set<vector<int> >::iterator itg2 = itg->second.begin();
-           itg2 != itg->second.end(); ++itg2) {
-        // cout << "In GetShapeLabelNodeBrNum: find a vector of sites: ";
-        // DumpIntVec(*itg2);
-        ConcatIntVec(listOrderedLeaves, *itg2);
-      }
+void NewickUtils ::ConsolidateSinglChildChain(string &strNW)
+{
+    if (strNW[0] != '(')
+    {
+        // nothing needs to be done
+        return;
     }
 
-    return res;
-  }
-}
-
-int TreeNode ::GetLevel() const {
-  // choose a not efficient but simple coding
-  int res = 0;
-  for (int i = 0; i < (int)listChildren.size(); ++i) {
-    int lvDesc = listChildren[i]->GetLevel();
-    if (lvDesc + 1 > res) {
-      res = lvDesc + 1;
-    }
-  }
-  return res;
-}
-
-void TreeNode ::GetEdgeLabelsToChild(TreeNode *pChild, vector<int> &lbls) {
-  YW_ASSERT_INFO(listChildren.size() == listEdgeLabels.size(),
-                 "Child num and edge label num do not match");
-  lbls.clear();
-  for (int i = 0; i < (int)listChildren.size(); ++i) {
-    if (listChildren[i] == pChild) {
-      GetEdgeLabelsAtBranch(i, lbls);
-    }
-  }
-  // YW_ASSERT_INFO(false, "GetEdgeLabelsToChild :: Fail to find such child");
-}
-
-TreeNode *TreeNode ::GetMRCA(TreeNode *pOther) {
-  TreeNode *pRes = this;
-  int dummy;
-  while (pRes != NULL && pRes->IsAncesterOf(pOther, dummy) == false) {
-    pRes = pRes->GetParent();
-  }
-  YW_ASSERT_INFO(pRes != NULL, "Fail to find MRCA");
-  return pRes;
-}
-
-int TreeNode ::GetNumEdgesToAncestor(TreeNode *pAssumedAncestor) {
-  // get # of edges betwene this node to its ancestor
-  // return -1 if the ancestor is not true ancestor
-  int res = 0;
-  TreeNode *pRes = this;
-  while (pRes != NULL && pRes != pAssumedAncestor) {
-    ++res;
-    pRes = pRes->GetParent();
-  }
-  if (pRes == NULL) {
-    res = -1;
-  }
-
-  return res;
-}
-
-void TreeNode ::GetSiblings(vector<TreeNode *> &listSibs) {
-  // siblings are parent's children (except itself)
-  listSibs.clear();
-  if (this->GetParent() != NULL) {
-    //
-    for (int i = 0; i < this->GetParent()->GetChildrenNum(); ++i) {
-      TreeNode *pn = this->GetParent()->GetChild(i);
-      if (pn != this) {
-        listSibs.push_back(pn);
-      }
-    }
-  }
-}
-
-void TreeNode ::Order() {
-  // do nothing if leaf
-  if (IsLeaf() == true) {
-    return;
-  }
-  // first order the leaves
-  for (int i = 0; i < (int)listChildren.size(); ++i) {
-    listChildren[i]->Order();
-  }
-
-  //
-  vector<multiset<string> > listDescLeaves;
-  for (int i = 0; i < (int)listChildren.size(); ++i) {
-    vector<string> vecLeafStrings;
-    listChildren[i]->GetAllLeafLabeles(vecLeafStrings);
-    multiset<string> setLeafStrings;
-    for (int j = 0; j < (int)vecLeafStrings.size(); ++j) {
-      setLeafStrings.insert(vecLeafStrings[j]);
-    }
-    listDescLeaves.push_back(setLeafStrings);
-  }
-  //
-  YW_ASSERT_INFO(listEdgeLabels.size() == listChildren.size(),
-                 "Same size must be");
-  for (int i = 0; i < (int)listChildren.size(); ++i) {
-    for (int j = i + 1; j < (int)listChildren.size(); ++j) {
-      //
-      if (listDescLeaves[i] > listDescLeaves[j]) {
-        // exhcnage everything
-        TreeNode *ptmp = listChildren[i];
-        listChildren[i] = listChildren[j];
-        listChildren[j] = ptmp;
-
-        vector<int> vtmp = listEdgeLabels[i];
-        listEdgeLabels[i] = listEdgeLabels[j];
-        listEdgeLabels[j] = vtmp;
+    //cout << "conslidate: " << strNW << endl;
+    // sometime there may be a nested chain of enclosed parenthesis
+    // consolidate these; and maintain the proper branch length if there are
+    string strRes = strNW;
+    double lenTot = 0.0;
+    bool fLen = false;
+    //bool fParenthRemoved = false;
 
-        //
-        multiset<string> stmp = listDescLeaves[i];
-        listDescLeaves[i] = listDescLeaves[j];
-        listDescLeaves[j] = stmp;
-      }
-    }
-  }
-}
+    while (true)
+    {
+        //cout << "current string: " << strRes << endl;
+        // stop if it become automic
+        string str1, str2;
+        bool fNonAtom = FindSplitIn(strRes, str1, str2);
 
-int TreeNode ::GetIntLabel() const {
-  int res = -1;
-  sscanf(label.c_str(), "%d", &res);
-  return res;
-}
+        //if( fNonAtom == false )
+        //{
+        //fParenthRemoved = true;
 
-void TreeNode ::SetIntLabel(int lbl) {
-  //
-  char buf[1024];
-  sprintf(buf, "%d", lbl);
-  label = buf;
-}
+        //
+        YW_ASSERT_INFO(strRes[0] == '(', "wrong");
+        int posRight = strRes.rfind(')');
+        YW_ASSERT_INFO(posRight > 0, "wrong1");
+        //cout << "posRight: " << posRight << endl;
+        if (posRight != (int)strRes.length() - 1)
+        {
+            int posLen = strRes.find(':', posRight);
+            //cout << "posLen: " << posLen << endl;
+            if (posLen > 0)
+            {
+                //if( lenTot > 0.0)
+                //{
+                //cout << "*HHHHH\n";
+                //}
+                fLen = true;
+                lenTot += GetLenAt(strRes, posLen + 1);
+                //cout << "lenTot: " << lenTot << endl;
+            }
+        }
+        //}
 
-bool TreeNode ::IsMulfurcate() {
-  if (IsLeaf() == true) {
-    return false;
-  } else {
-    if (GetChildrenNum() > 2) {
-      return true;
+        int len = posRight - 1;
+        strRes = strRes.substr(1, len);
+
+        if (fNonAtom == true)
+        {
+            break;
+        }
     }
-    for (int ii = 0; ii < GetChildrenNum(); ++ii) {
-      if (GetChild(ii)->IsMulfurcate() == true) {
-        return true;
-      }
+    string strRes1;
+    //if( fParenthRemoved )
+    //{
+    strRes1 += "(";
+    //}
+    strRes1 += strRes;
+    //if( fParenthRemoved )
+    //{
+    strRes1 += ")";
+    //}
+    if (fLen)
+    {
+        strRes1 += ":" + std::to_string(lenTot);
     }
 
-    return false;
-  }
-}
-
-TreeNode *TreeNode ::GetRoot() const {
-  TreeNode *pself = const_cast<TreeNode *>(this);
-  TreeNode *proot = pself;
-  while (proot->GetParent() != NULL) {
-    proot = proot->GetParent();
-  }
-  YW_ASSERT_INFO(proot != NULL, "Root is null");
-  return proot;
-}
-
-void TreeNode ::GetAllAncestors(set<TreeNode *> &listAncestors) {
-  if (GetParent() != NULL) {
-    listAncestors.insert(GetParent());
-    GetParent()->GetAllAncestors(listAncestors);
-  }
-}
-
-void TreeNode ::GetAllChildren(set<TreeNode *> &setChildren) const {
-  //
-  // TreeNode *pthis = const_cast<TreeNode *>(this);
-  // PopulateSetByVecGen( setChildren, pthis->listChildren );
-  setChildren.clear();
-  for (int i = 0; i < GetChildrenNum(); ++i) {
-    setChildren.insert(listChildren[i]);
-  }
-}
-
-int TreeNode ::GetChildIndex(TreeNode *pchild) const {
-  // get the index of this particular child; if not found, the error
-  TreeNode *pself = const_cast<TreeNode *>(this);
-  int res = -1;
-  for (int i = 0; i < (int)listChildren.size(); ++i) {
-    if (pself->GetChild(i) == pchild) {
-      res = i;
-      break;
-    }
-  }
-  YW_ASSERT_INFO(res >= 0, "Fail to find666");
-  return res;
-}
-
-void TreeNode ::RemoveLabels() {
-  // remove all edge labels (i.e. make them empty)
-  int numLLs = listEdgeLabels.size();
-  listEdgeLabels.clear();
-  listEdgeLabels.resize(numLLs);
-
-  // then reurrisve do it
-  for (int i = 0; i < GetChildrenNum(); ++i) {
-    GetChild(i)->RemoveLabels();
-  }
-}
-
-void TreeNode ::RemoveLabelsPar() {
-  // remove the parent to this node's label
-  TreeNode *ppar = GetParent();
-  if (ppar == NULL) {
-    return;
-  }
-  int childIndex = ppar->GetChildIndex(this);
-  YW_ASSERT_INFO(childIndex < (int)ppar->listEdgeLabels.size(), "Overflow");
-  ppar->listEdgeLabels[childIndex].clear();
-}
-
-void TreeNode ::IncEdgeLabelsBy(int offset, bool fSub) {
-  //
-  for (int i = 0; i < (int)listEdgeLabels.size(); ++i) {
-    for (int j = 0; j < listEdgeLabels[i].size(); ++j) {
-      listEdgeLabels[i][j] += offset;
-    }
-  }
-  if (fSub) {
-    for (int i = 0; i < (int)listChildren.size(); ++i) {
-      listChildren[i]->IncEdgeLabelsBy(offset, fSub);
-    }
-  }
-}
-
-void TreeNode ::Binarize(int &idToUseNext) {
-  // recursively make the tree binary
-  // if this node has more than 2 children, create a new internal node
-  if (GetChildrenNum() > 2) {
-    //
-    TreeNode *pnode = new TreeNode(idToUseNext++);
-    for (int i = 1; i < GetChildrenNum(); ++i) {
-      vector<int> ss;
-      pnode->AddChild(GetChild(i), ss);
-    }
-    TreeNode *pn1 = GetChild(0);
-    this->listChildren.clear();
-    this->listChildren.push_back(pn1);
-    vector<int> ss;
-    AddChild(pnode, ss);
-  }
+    strNW = strRes1;
+    //cout << "conslidate to " << strNW << endl;
+}
 
-  for (int i = 0; i < GetChildrenNum(); ++i) {
+double NewickUtils ::GetLenAt(const string &strNW, int posLen)
+{
     //
-    GetChild(i)->Binarize(idToUseNext);
-  }
-}
-
-int TreeNode ::GetMaxIdWithinSubtree() const {
-  //
-  int res = GetID();
-  TreeNode *pthis = const_cast<TreeNode *>(this);
-  for (int i = 0; i < GetChildrenNum(); ++i) {
-    TreeNode *pnc = pthis->GetChild(i);
-    int nc = pnc->GetMaxIdWithinSubtree();
-    if (nc > res) {
-      //
-      res = nc;
-    }
-  }
-  return res;
-}
-
-int TreeNode ::GetNumNodesUnder(bool fInternalOnly, bool fAddNonBinary) const {
-  // fInternalOnly: true if only count internal node
-  // include itself if this is an internal node
-  // fAddNonBinary: true if an internal node is considered to have multiple
-  // (hidden) nodes
-  int res = 0;
-  if (fInternalOnly == false || IsLeaf() == false) {
-    res = 1;
-  }
-  // recursively check all children
-  TreeNode *pn = const_cast<TreeNode *>(this);
-  for (int i = 0; i < GetChildrenNum(); ++i) {
-    res += pn->GetChild(i)->GetNumNodesUnder(fInternalOnly, fAddNonBinary);
-  }
-  return res;
+    int posLenEnd = strNW.length() - 1;
+    int sepPos1 = strNW.find(',', posLen);
+    int sepPos2 = strNW.find(')', posLen);
+    if (sepPos1 > 0 && sepPos1 - 1 < posLenEnd)
+    {
+        posLenEnd = sepPos1 - 1;
+    }
+    if (sepPos2 > 0 && sepPos2 - 1 < posLenEnd)
+    {
+        posLenEnd = sepPos2 - 1;
+    }
+    if (posLenEnd <= posLen)
+    {
+        cout << "posLen: " << posLen << ", posLenEnd: " << posLenEnd << ", tree: " << strNW << endl;
+    }
+    YW_ASSERT_INFO(posLenEnd >= posLen, "No length found");
+    string lenstr = strNW.substr(posLen, posLenEnd - posLen + 1);
+    return atof(lenstr.c_str());
 }
 
 // ***************************************************************************
-// Utilites functions
-// ***************************************************************************
-
-void PhylogenyTreeIteratorBacktrack ::Init() {
-  while (stackNodesToExplore.empty() == false) {
-    stackNodesToExplore.pop();
-  }
-  // cout << "Nnow stack empty.\n";
-  // Now recurisvely store the order of the walk
-  TreeNode *rootNode = phyTree.GetRoot();
-  if (rootNode != NULL) {
-    stackNodesToExplore.push(rootNode);
-  }
-}
-
-void PhylogenyTreeIteratorBacktrack ::Next() {
-  if (stackNodesToExplore.empty() == true) {
-    return;
-  }
-  TreeNode *pn = stackNodesToExplore.top();
-  // push its descendent in
-  stackNodesToExplore.pop();
-  for (int i = 0; i < (int)pn->GetChildrenNum(); ++i) {
-    //
-    stackNodesToExplore.push(pn->GetChild(i));
-  }
-}
-void PhylogenyTreeIteratorBacktrack ::Back() {
-  if (stackNodesToExplore.empty() == true) {
-    return;
-  }
-  // simply get rid of the current node
-  stackNodesToExplore.pop();
-}
 
-bool PhylogenyTreeIteratorBacktrack ::IsDone() {
-  return stackNodesToExplore.empty();
+TaxaMapper ::TaxaMapper()
+{
+    curId = 0;
+    fInit = false;
 }
 
-TreeNode *PhylogenyTreeIteratorBacktrack ::GetCurrNode() {
-  if (IsDone() == false) {
-    return stackNodesToExplore.top();
-  } else {
-    return NULL;
-  }
+// utility
+bool TaxaMapper ::IsEmpty()
+{
+    return mapStrToId.size() == 0;
 }
 
-///////////////////////////////////////////////////////////////////
-void PhylogenyTreeIterator ::Init() {
-  while (stackPostorder.empty() == false) {
-    stackPostorder.pop();
-  }
-  // cout << "Nnow stack empty.\n";
-  // Now recurisvely store the order of the walk
-  TreeNode *rootNode = phyTree.GetRoot();
-  if (rootNode != NULL) {
-    phyTree.PostOrderPushStack(rootNode, stackPostorder);
-  }
+int TaxaMapper ::AddTaxaString(const string &str)
+{
+    //cout << "AddTaxaString : curId = " << curId  << " for new taxa string " << str << endl;
+    if (mapStrToId.find(str) == mapStrToId.end())
+    {
+        mapStrToId.insert(map<string, int>::value_type(str, curId));
+        mapIdToStr.insert(map<int, string>::value_type(curId, str));
+        curId++;
+    }
+    //else
+    //{
+    return mapStrToId[str];
+    //}
 }
 
-void PhylogenyTreeIterator ::Next() {
-  if (stackPostorder.empty() == true) {
-    return;
-  }
-  // TreeNode *pn = stackPostorder.top();
-  stackPostorder.pop();
+void TaxaMapper ::AddTaxaStringWithId(int tid, const string &str)
+{
+    // caution: don't mix up with the previous auto-id mode
+    mapStrToId.insert(map<string, int>::value_type(str, tid));
+    mapIdToStr.insert(map<int, string>::value_type(tid, str));
 }
 
-bool PhylogenyTreeIterator ::IsDone() { return stackPostorder.empty(); }
+int TaxaMapper ::GetId(const string &str)
+{
+    //cout << "Num of entries in str mapper : " << mapStrToId.size() << endl;
+    //for( map<string,int> :: iterator it =mapStrToId.begin(); it != mapStrToId.end(); ++it )
+    //{
+    //cout << it->first << ", " << it->second << endl;
+    //}
 
-TreeNode *PhylogenyTreeIterator ::GetCurrNode() {
-  if (IsDone() == false) {
-    return stackPostorder.top();
-  } else {
-    return NULL;
-  }
+    if (mapStrToId.find(str) == mapStrToId.end())
+    {
+        // when the str is not pre-recorded, return negative value
+        return -1;
+        //cout << "This taxa: " << str << " seems to be wrong\n";
+        //YW_ASSERT_INFO( false, "Fail to find the taxa" );
+    }
+    return mapStrToId[str];
+}
+bool TaxaMapper ::IsIdIn(int id)
+{
+    return mapIdToStr.find(id) != mapIdToStr.end();
 }
 
-// ***************************************************************************
-// Main functions
-// ***************************************************************************
-
-PhylogenyTreeBasic ::PhylogenyTreeBasic() : rootNode(NULL), numLeaves(-1) {}
-
-PhylogenyTreeBasic ::~PhylogenyTreeBasic() {
-  // cout << "Deleting tree: ";
-  // Dump();
+string TaxaMapper ::GetString(const int id)
+{
+    if (mapIdToStr.find(id) == mapIdToStr.end())
+    {
+        cout << "mapIdToStr: ";
+        for (map<int, string>::iterator it = mapIdToStr.begin(); it != mapIdToStr.end(); ++it)
+        {
+            cout << "[" << it->first << "," << it->second << "]  ";
+        }
+        cout << endl;
 
-  // Should delete the tree
-  if (rootNode != NULL) {
-    delete rootNode;
-    rootNode = NULL;
-  }
+        cout << "This taxa id: " << id << " seems to be wrong\n";
+        YW_ASSERT_INFO(false, "Fail to find the taxa");
+    }
+    return mapIdToStr[id];
 }
 
-PhylogenyTreeBasic *PhylogenyTreeBasic ::Copy() {
-  PhylogenyTreeBasic *pCopy = new PhylogenyTreeBasic;
-  pCopy->numLeaves = pCopy->numLeaves;
-  pCopy->SetRoot(this->GetRoot()->Copy());
-  return pCopy;
+string TaxaMapper ::ConvIdStringWithOrigTaxa(const string &strId)
+{
+#if 0
+cout << "strID: " << strId << ": Num of entries in str mapper : " << mapIdToStr.size() << endl;
+for( map<int,string> :: iterator it =mapIdToStr.begin(); it != mapIdToStr.end(); ++it )
+{
+cout << it->first << ", " << it->second << endl;
 }
+#endif
+    // convert a string with id (i.e. integer-based identifier) back
+    // to user-specified format
+    // Simple approach: find everything bebetween ( and , (or :),  and ) and convert to
+    // YW: 05/02/19: also allow '#' as seperator to support mutation tree
+    string res;
+    string strIdDirect = strId;
+    int curpos = 0;
+    int lastposOut = 0;
+    //	char *strIdBuf = (char *)strIdDirect.c_str();
+    while (curpos < (int)strId.length())
+    {
+        //cout << "curpos = " << curpos << ", res = " << res << endl;
+        bool fIdentifier = false;
+        if ((strId[curpos] == '(' || strId[curpos] == ',' || strId[curpos] == '#') && (curpos == (int)strId.length() - 1 || (strId[curpos + 1] != '(' && strId[curpos + 1] != '#')))
+        {
+            fIdentifier = true;
+        }
+        //cout << "Adding it: " << strId[curpos] << endl;
+        res += strId[curpos];
+        lastposOut++;
+        curpos++;
 
-void PhylogenyTreeBasic ::PostOrderPushStack(
-    TreeNode *treeNode, stack<TreeNode *> &stackPostorder) {
-  stackPostorder.push(treeNode);
-  // cout << "Pusing node " << treeNode->GetLabel() << endl;
-
-  for (int i = 0; i < (int)treeNode->listChildren.size(); ++i) {
-    PostOrderPushStack(treeNode->listChildren[i], stackPostorder);
-  }
+        // should we search for id
+        if (fIdentifier == true)
+        {
+            //cout << "Now searching for identifier\n";
+            // now scan to the right to find the position to read the identifier
+            while (curpos < (int)strId.length())
+            {
+                if (strId[curpos] != ')' && strId[curpos] != ':' && strId[curpos] != ',' && strId[curpos] != '#')
+                {
+                    curpos++;
+                }
+                else
+                {
+                    break;
+                }
+            }
+            //cout << "lastposOut: " << lastposOut << ", curpos = " << curpos << endl;
+            //
+            //curpos--;
+            int idnum = -1;
+            string strSub = strId.substr(lastposOut, curpos - lastposOut);
+            //char buftmp[100];
+            //memcpy(buftmp, &strIdBuf[lastposOut], curpos-lastposOut );
+            //sscanf(buftmp, "%d", &idnum);
+            sscanf(strSub.c_str(), "%d", &idnum);
+            string idNew = GetString(idnum);
+            ////cout << "After searching, curpos = " << curpos << ", buftmp = " << buftmp  << ", idnum = " << idnum << ", idNew = " << idNew << endl;
+            //cout << "After searching, curpos = " << curpos << ", strSub = " << strSub  << ", idnum = " << idnum << ", idNew = " << idNew << endl;
+            //char buf[100];
+            //sprintf(buf, "%d", idNew);
+            res += idNew;
+            lastposOut = curpos;
+        }
+    }
+    return res;
 }
 
-void PhylogenyTreeBasic ::ConsOnNewick(const string &nwString, int numLeaves,
-                                       bool fBottomUp, TaxaMapper *pTMapper) {
-  // Here we try to reconstruct from a newick string here
-  // This function creates the tree by creating and linking tree nodes
-  // Make sure the tree is empty
-  if (rootNode != NULL) {
-    delete rootNode;
-    rootNode = NULL;
-  }
-
-  // we perform this by recursively
-  int invId = 1000000;
-  if (numLeaves > 0) {
-    // here we assume leaf id starts from 0, will check it
-    invId = numLeaves;
-  }
-  int leafId = 0;
-  rootNode = ConsOnNewickSubtree(nwString, leafId, invId, numLeaves, fBottomUp,
-                                 pTMapper);
-}
-
-void PhylogenyTreeBasic ::ConsOnNewickDupLabels(const string &nwString,
-                                                TaxaMapper *pTMapper) {
-  // Here we try to reconstruct from a newick string here
-  // This function creates the tree by creating and linking tree nodes
-  // Make sure the tree is empty
-  if (rootNode != NULL) {
-    delete rootNode;
-    rootNode = NULL;
-  }
+string TaxaMapper ::ExtractIdPartFromStr(const string &strIdNW)
+{
+    // extract id part of the string
+    string strToUse = strIdNW;
+    size_t posSeparator = strIdNW.find(':');
 
-  // we perform this by recursively
-  int numLeaves = GetNewickNumLeaves(nwString);
-  // we start counting leaves from 0
-  int invId = numLeaves;
-  int leafId = 0;
-  // cout << "Num of leaves = " << numLeaves << endl;
-  rootNode = ConsOnNewickSubtreeDupLabels(nwString, invId, leafId, pTMapper);
+    if (posSeparator != string::npos)
+    {
+        strToUse = strIdNW.substr(0, (int)posSeparator);
+    }
+    return strToUse;
 }
 
-// ********************************************************************************
-// Utitlieis for construcing edge label trees
+int TaxaMapper ::GetIdFromStr(const string &strPart, TaxaMapper *pTMapper)
+{
+    //cout << "GetIdFromStr: " << strPart << endl;
 
-static int GetEdgeLabelPosFrom(const string &strMutTreeCur, int posCur) {
-  //
-  int posCurGNTPF = posCur;
-  while (posCurGNTPF < (int)strMutTreeCur.length()) {
-    // printf "getNextTaxaPosFrom: %d: curr ch: %s\n", posCurGNTPF,
-    // substr(strMutTreeCur,posCurGNTPF,1);
-    if (strMutTreeCur[posCurGNTPF] == '#') {
-      break;
-    }
-    ++posCurGNTPF;
-  }
-  if (posCurGNTPF >= (int)strMutTreeCur.length()) {
-    posCurGNTPF = -1;
-  }
-  return posCurGNTPF;
-}
-
-static int getNextTaxaPosFromLevelUp(const string &strMutTreeCur, int posCur) {
-  int posCurGNTPF = posCur;
-  int level = 0;
-  bool fUpperOnly = false;
-  while (posCurGNTPF < (int)strMutTreeCur.length()) {
-    char chGNTPF = strMutTreeCur[posCurGNTPF];
-    if (chGNTPF == '#' && ((level >= 0 && fUpperOnly == false) || level > 0)) {
-      break;
-    }
-    if (chGNTPF == '(') {
-      --level;
-    } else if (chGNTPF == ')') {
-      ++level;
-    } else if (chGNTPF == ',') {
-      fUpperOnly = true;
-    }
-
-    ++posCurGNTPF;
-  }
-  if (posCurGNTPF >= (int)strMutTreeCur.length()) {
-    posCurGNTPF = -1;
-  }
-  return posCurGNTPF;
-}
-
-static string getTaxaAt(const string &strMutTreeCur, int posCur) {
-  int posGTA = posCur;
-  if (strMutTreeCur[posCur] == '#') {
-    posGTA = posCur + 1;
-  }
-  //  now find where it ends
-  int posGTA2 = posGTA;
-  while (posGTA2 < (int)strMutTreeCur.length()) {
-    char chGTA = strMutTreeCur[posGTA2];
-    if (chGTA == '#' || chGTA == ',' || chGTA == ')') {
-      break;
-    }
-    ++posGTA2;
-  }
-  if (posGTA2 > (int)strMutTreeCur.length()) {
-    posGTA2 = (int)strMutTreeCur.length() - 1;
-  }
-  return strMutTreeCur.substr(posGTA, posGTA2 - posGTA);
-}
-
-void PhylogenyTreeBasic ::ConsOnNewickEdgeLabelTree(const string &nwString) {
-  // view each edge label as taxon; a stand-alone edge label is the leaf;
-  // edge label may or may not have a leading seperator (# in this
-  // implementation); e.g. ((#1,#2#3)#4)  this give four node, one for each edge
-  // label
-  if (rootNode != NULL) {
-    delete rootNode;
-    rootNode = NULL;
-  }
-  // find all edge labels and how they are related
-  map<string, string> mapEdgeLabelPar;
-  int posEdgeLbl = 0;
-  while (posEdgeLbl < (int)nwString.length()) {
-    //
-    posEdgeLbl = GetEdgeLabelPosFrom(nwString, posEdgeLbl);
-    if (posEdgeLbl < 0) {
-      break;
-    }
-    string strTaxon = getTaxaAt(nwString, posEdgeLbl);
-    // find its parent
-    int posEdgeLblPar = getNextTaxaPosFromLevelUp(nwString, posEdgeLbl + 1);
-    string strPar;
-    if (posEdgeLblPar >= 0) {
-      //
-      strPar = getTaxaAt(nwString, posEdgeLblPar);
-    }
-    mapEdgeLabelPar[strTaxon] = strPar;
-    // cout << "Taxon: " << strTaxon << " is child of " << strPar << endl;
-    ++posEdgeLbl;
-  }
-  // now create nodes
-  int nidNext = 1;
-  this->rootNode = new TreeNode(nidNext++);
-  string strLblRoot = "-";
-  int posRootLbl = -1;
-  std::size_t pos1 = nwString.find_last_of(')');
-  std::size_t pos2 = nwString.find_last_of('#');
-  if (pos1 != string::npos && pos2 != string::npos) {
-    posRootLbl = max(pos1, pos2);
-  } else if (pos1 != string::npos) {
-    posRootLbl = pos1;
-  } else if (pos2 != string::npos) {
-    posRootLbl = pos2;
-  }
-  if (posRootLbl >= 0) {
-    strLblRoot = getTaxaAt(nwString, posRootLbl);
-  }
-
-  // cout << "root label: " << strLblRoot << endl;
-  // now create all descendents
-  map<string, TreeNode *> mapNodes;
-  mapNodes[strLblRoot] = this->rootNode;
-  while (true) {
-    // find direct descendents
-    TreeNode *pnPar = NULL;
-    string strChildUse;
-    for (map<string, string>::iterator it = mapEdgeLabelPar.begin();
-         it != mapEdgeLabelPar.end(); ++it) {
-      string strChild = it->first;
-      string strPar = it->second;
-      if (mapNodes.find(strChild) == mapNodes.end() &&
-          mapNodes.find(strPar) != mapNodes.end()) {
-        pnPar = mapNodes[strPar];
-        strChildUse = strChild;
-      }
-    }
-    if (pnPar == NULL) {
-      break;
-    }
-    TreeNode *pnode = new TreeNode(nidNext++);
-    pnode->SetLabel(strChildUse);
-    vector<int> listLblsDummy;
-    pnPar->AddChild(pnode, listLblsDummy);
-
-    mapNodes[strChildUse] = pnode;
-  }
-
-  if (strLblRoot.length() == 0) {
-    strLblRoot = "-";
-  }
-  this->rootNode->SetLabel(strLblRoot);
-}
-
-void PhylogenyTreeBasic ::InitPostorderWalk() {
-  // cout << "InitPostorderWalk() entry\n";
-  // when walk, return the value of the node if any
-  // Clearup the previous storage if any
-  while (stackPostorder.empty() == false) {
-    stackPostorder.pop();
-  }
-  // cout << "Nnow stack empty.\n";
-  // Now recurisvely store the order of the walk
-  if (rootNode != NULL) {
-    PostOrderPushStack(rootNode, stackPostorder);
-  }
-}
+    string strToUse = strPart;
+    size_t posSeparator = strPart.find(':');
 
-TreeNode *PhylogenyTreeBasic ::NextPostorderWalk() {
-  // Return false, when nothing to go any more
-  if (stackPostorder.empty() == true) {
-    return NULL;
-  }
-  TreeNode *pn = stackPostorder.top();
-  stackPostorder.pop();
+    if (posSeparator != string::npos)
+    {
+        strToUse = strPart.substr(0, (int)posSeparator);
+    }
 
-//    node = pn;
-#if 0
-    if( pn->nodeValues.size() > 0 )
+    // 05/07/15: it is also possible user add gene index (in # sign)
+    size_t posSeparator2 = strToUse.find('#');
+    if (posSeparator2 != string::npos)
     {
-        // There is valid node value stored here
-        nodeValue = pn->nodeValues[0];
+        strToUse = strToUse.substr(0, (int)posSeparator2);
     }
-    else
+    //cout << "strPart: " << strPart << ",strUse: " << strToUse << endl;
+
+    // get rid of
+    int res = -1;
+    if (pTMapper == NULL)
     {
-        nodeValue = -1;     // no node value is stored here
+        sscanf(strToUse.c_str(), "%d", &res);
+        //cout << "Empty mapper\n";
     }
-#endif
-  return pn;
-}
-
-void PhylogenyTreeBasic ::OutputGML(const char *inFileName) {
-  // Now output a file in GML format
-  // First create a new name
-  string name = inFileName;
-  // cout << "num edges = " << listEdges.size() << endl;
-
-  DEBUG("FileName=");
-  DEBUG(name);
-  DEBUG("\n");
-  // Now open file to write out
-  ofstream outFile(name.c_str());
-
-  // First output some header info
-  outFile << "graph [\n";
-  outFile << "comment ";
-  OutputQuotedString(outFile, "Automatically generated by Graphing tool");
-  outFile << "\ndirected  1\n";
-  outFile << "id  1\n";
-  outFile << "label ";
-  OutputQuotedString(outFile, "Phylogeny Tree....\n");
-
-  // Now output all the vertices
-  //	int i;
-  stack<TreeNode *> nodesStack;
-  if (rootNode != NULL) {
-    nodesStack.push(rootNode);
-  }
-  // cout << "a.1.1\n";
-  while (nodesStack.empty() == false) {
-    TreeNode *pn = nodesStack.top();
-    nodesStack.pop();
-
-    outFile << "node [\n";
-
-    outFile << "id " << pn->id << endl;
-    outFile << "label ";
-    string nameToUse = " ";
-    if (pn->GetLabel() != "-") {
-      nameToUse = pn->GetLabel();
+    else
+    {
+        // are we reading in the first tree or not
+        res = pTMapper->GetId(strToUse);
+        //if( pTMapper->IsInitialized() == true )
+        //{
+        //	res  = pTMapper->GetId(strToUse);
+        //cout << "GetIdFromStr: GetId: " << strToUse << ": " << res << endl;
+        //}
+        //else
+        if (res < 0)
+        {
+            // this label is not seen before, so we add a new record
+            // this is new
+            res = pTMapper->AddTaxaString(strToUse);
+            //cout << "GetIdFromStr: New id: " << strToUse << ": " << res << endl;
+        }
     }
+    return res;
+}
+
+void TaxaMapper ::GetAllTaxaIds(set<int> &taxaIndices) const
+{
+    //
+    taxaIndices.clear();
+    for (map<int, string>::const_iterator it = mapIdToStr.begin(); it != mapIdToStr.end(); ++it)
+    {
+        taxaIndices.insert(it->first);
+    }
+}
+
+void TaxaMapper ::GetAllTaxaStrs(set<string> &setStrs) const
+{
+    //
+    setStrs.clear();
+    for (map<int, string>::const_iterator it = mapIdToStr.begin(); it != mapIdToStr.end(); ++it)
+    {
+        setStrs.insert(it->second);
+    }
+}
+
+void TaxaMapper ::InitToDec1Mode(int numTaxa)
+{
+    // assume taxa is in the format as 1, 2, 3 and so on
+    // init as follows: 1 ==> 0, 2 ==> 1 and so on
+    for (int taxa = 1; taxa <= numTaxa; ++taxa)
+    {
+        char buf[100];
+        sprintf(buf, "%d", taxa);
+        string strid = buf;
+        AddTaxaString(strid);
+    }
+    SetInitialized(true);
+}
+
+void TaxaMapper ::Dump() const
+{
+    //
+    cout << "curId = " << curId;
+    if (fInit == true)
+    {
+        cout << "initialized. ";
+    }
+    else
+    {
+        cout << "not initialized yet. ";
+    }
+    for (map<string, int>::const_iterator it = mapStrToId.begin(); it != mapStrToId.end(); ++it)
+    {
+        //
+        cout << "Mapping taxa " << it->first << " to id: " << it->second << "  ";
+    }
+    cout << endl;
+}
+
+// ***************************************************************************
+// Tree class functions
+// ***************************************************************************
+TreeNode ::TreeNode() : parent(NULL), id(-1), label("-"), shape(PHY_TN_DEFAULT_SHAPE), lenBranchAbove(-1.0)
+{
+}
+
+TreeNode ::TreeNode(int iid) : parent(NULL), id(iid), label("-"), shape(PHY_TN_DEFAULT_SHAPE), lenBranchAbove(-1.0)
+{
+    //    id = iid;
+    //    cout << "Creating tree node " << iid << endl;
+}
+
+TreeNode ::~TreeNode()
+{
+    //cout << "Deleting tree node " << id << ", number of children: " << GetChildrenNum() << endl;
+    //cout << "Dump: ";
+    //Dump();
+    // We recursively delete all its children here
+    for (int i = 0; i < (int)listChildren.size(); ++i)
+    {
+        delete listChildren[i];
+    }
+    listChildren.clear();
+}
+
+void TreeNode::Dump() const
+{
+    //
+    cout << "<node: " << GetLabel() << ", id=" << GetID();
+    if (lenBranchAbove >= 0.0)
+    {
+        cout << ", length = " << lenBranchAbove;
+    }
+    cout << ", num of child = " << GetChildrenNum() << ">   ";
+}
+
+TreeNode *TreeNode ::Copy()
+{
+    // make a copy (and its descendents)
+    TreeNode *pCopy = new TreeNode(GetID());
+    pCopy->SetLabel(this->GetLabel());
+    pCopy->SetUserLabel(this->GetUserLabel());
+    pCopy->lenBranchAbove = this->lenBranchAbove;
+    pCopy->nodeValues = this->nodeValues;
+    for (int i = 0; i < GetChildrenNum(); ++i)
+    {
+        TreeNode *pccopy = GetChild(i)->Copy();
+        vector<int> listLbelsCopy;
+        if ((int)this->listEdgeLabels.size() >= i + 1)
+        {
+            listLbelsCopy = this->listEdgeLabels[i];
+        }
+        pCopy->AddChild(pccopy, listLbelsCopy);
+    }
+    return pCopy;
+}
+
+void TreeNode ::AddChild(TreeNode *pChild, const vector<int> &labels)
+{
+    // This function add an edge. The edge can be labeled with a set of labels (for now, only integers)
+    YW_ASSERT(pChild != NULL);
+
+    // make sure this child is not already a children
+    // not sure if really need it
+
+    pChild->parent = this;
+    listChildren.push_back(pChild);
+    listEdgeLabels.push_back(labels);
+}
+
+void TreeNode ::AddEdgeLabelToChild(int cIndex, int lbl)
+{
+    YW_ASSERT_INFO(cIndex < GetChildrenNum(), "Overflow");
+    this->listEdgeLabels[cIndex].push_back(lbl);
+}
+
+void TreeNode ::RemoveChild(TreeNode *pChild)
+{
+    YW_ASSERT_INFO(pChild != NULL, "RemoveChild: wrong");
+    pChild->parent = NULL;
+    vector<TreeNode *> listChildrenNew;
+    vector<vector<int>> listEdgeLabelsNew;
+    YW_ASSERT_INFO(listChildrenNew.size() == listEdgeLabelsNew.size(), "must be same size");
+    for (int i = 0; i < (int)listChildren.size(); ++i)
+    {
+        if (listChildren[i] != pChild)
+        {
+            listChildrenNew.push_back(listChildren[i]);
+            listEdgeLabelsNew.push_back(listEdgeLabels[i]);
+        }
+    }
+    // update
+    listChildren = listChildrenNew;
+    listEdgeLabels = listEdgeLabelsNew;
+}
+
+void TreeNode ::RemoveAllChildren()
+{
+    // remove all children of this node
+    //listChildren.clear();
+    //listEdgeLabels.clear();
+    while (GetChildrenNum() > 0)
+    {
+        TreeNode *pc = GetChild(0);
+        //cout << "Removing pc = ";
+        //pc->Dump();
+        //cout << endl;
+        RemoveChild(pc);
+    }
+    //cout << "Done with removeallchildren\n";
+}
+
+void TreeNode ::DetachAllChildren()
+{
+    // diff from RemoveAllChildren, simply detach the children from the parent (i.e. parent no longer has record for these children)
+    this->listChildren.clear();
+    this->listEdgeLabels.clear();
+}
+
+void TreeNode ::DetachSelf()
+{
+    // detach this node from parent (but don't perform any memory release)
+    TreeNode *pp = GetParent();
+
+    if (pp != NULL)
+    {
+        //
+        pp->RemoveChild(this);
+    }
+}
+
+void TreeNode ::GetDescendentLabelSet(set<int> &labelSet)
+{
+    // This function accumulate the set of descendents in the label sets
+    // CAUTION: assume labelset is EMPTY!!!!
+    //if( IsLeaf() == true)
+    //{
+    string lbl = GetLabel();
+    //cout << "lbl = " << lbl << endl;
+
+    if (lbl != "-" && lbl != "?" && lbl != "()" && lbl != "(?)")
+    {
+        const char *buf = lbl.c_str();
+        int rowIndex;
+        if (buf[0] < '0' || buf[0] > '9')
+        {
+            sscanf(buf + 1, "%d", &rowIndex);
+        }
+        else
+        {
+            // This is a plain label, use it
+            sscanf(buf, "%d", &rowIndex);
+        }
+        //cout << "rowIndex = " << rowIndex << endl;
+        labelSet.insert(rowIndex);
+    }
+    else if (nodeValues.size() >= 1)
+    {
+        // simply insert a single value here
+        //labelSet.insert( nodeValues[0] );
+    }
+
+#if 0
+        // set every label into the set
+        for(int i=0; i<nodeValues.size(); ++i)
+        {
+            if( nodeValues[i] >= 0 )
+            {
+                labelSet.insert( nodeValues[i] );
+            }
+        }
+#endif
+    //}
+    //else
+    if (IsLeaf() == false)
+    {
+        for (int i = 0; i < GetChildrenNum(); ++i)
+        {
+            GetChild(i)->GetDescendentLabelSet(labelSet);
+        }
+    }
+}
+
+bool TreeNode ::IsAncesterOf(TreeNode *pAssumedDescend, int &branchIndex)
+{
+    // This function check to see if pAssumedDescend is descedent of the current node
+    // If so, we also find the branch index that comes to this node
+    if (pAssumedDescend == NULL)
+    {
+        return false;
+    }
+    if (pAssumedDescend == this)
+    {
+        branchIndex = -1;
+        return true;
+    }
+
+    TreeNode *pCurrent = pAssumedDescend;
+    TreeNode *pParent = pAssumedDescend->parent;
+
+    while (pParent != NULL)
+    {
+        if (pParent == this)
+        {
+            // Find out which branch leads to it
+            branchIndex = -1;
+            for (int i = 0; i < (int)listChildren.size(); ++i)
+            {
+                if (listChildren[i] == pCurrent)
+                {
+                    branchIndex = i;
+                }
+            }
+            YW_ASSERT(branchIndex >= 0);
+            // Tell the good news
+            return true;
+        }
+        pCurrent = pParent;
+        pParent = pParent->parent;
+    }
+
+    return false;
+}
+
+void TreeNode ::GetAllDescendents(set<TreeNode *> &setDescendents)
+{
+    // Note: include itself
+    setDescendents.insert(this);
+    for (int i = 0; i < (int)listChildren.size(); ++i)
+    {
+        listChildren[i]->GetAllDescendents(setDescendents);
+    }
+}
+
+void TreeNode ::GetAllLeavesUnder(set<TreeNode *> &setDescendents)
+{
+    // Note: include itself
+    if (this->IsLeaf() == true)
+    {
+        setDescendents.insert(this);
+    }
+    for (int i = 0; i < (int)listChildren.size(); ++i)
+    {
+        listChildren[i]->GetAllLeavesUnder(setDescendents);
+    }
+}
+
+void TreeNode ::GetAllLeavesIdUnder(set<int> &setDescendents)
+{
+    set<TreeNode *> ss;
+    GetAllLeavesUnder(ss);
+    setDescendents.clear();
+    for (set<TreeNode *>::iterator it = ss.begin(); it != ss.end(); ++it)
+    {
+        setDescendents.insert((*it)->GetID());
+    }
+}
+
+void TreeNode ::GetAllDescendIntLbls(set<int> &setIntLbs)
+{
+    //
+    if (this->IsLeaf() == true)
+    {
+        setIntLbs.insert(this->GetIntLabel());
+    }
+    else
+    {
+        for (int i = 0; i < (int)listChildren.size(); ++i)
+        {
+            listChildren[i]->GetAllDescendIntLbls(setIntLbs);
+        }
+    }
+}
+
+void TreeNode ::GetAllLeafLabeles(vector<string> &listLeafLabels)
+{
+    //
+    if (IsLeaf() == true)
+    {
+        listLeafLabels.push_back(GetLabel());
+    }
+    else
+    {
+        for (int i = 0; i < (int)listChildren.size(); ++i)
+        {
+            listChildren[i]->GetAllLeafLabeles(listLeafLabels);
+        }
+    }
+}
+void TreeNode ::GetAllLeafIntLabeles(vector<int> &listLeafLabels)
+{
+    //
+    if (IsLeaf() == true)
+    {
+        listLeafLabels.push_back(GetIntLabel());
+    }
+    else
+    {
+        for (int i = 0; i < (int)listChildren.size(); ++i)
+        {
+            listChildren[i]->GetAllLeafIntLabeles(listLeafLabels);
+        }
+    }
+}
+
+void TreeNode ::GetAllDistinctLeafLabeles(set<string> &setLeafLabels)
+{
+    //
+    vector<string> listLeafLabels;
+    GetAllLeafLabeles(listLeafLabels);
+    PopulateSetByVecGen(setLeafLabels, listLeafLabels);
+}
+
+string TreeNode ::GetShapeLabel(const set<int> &idTerms, map<int, int> &mapNodeLabel) const
+{
+    //cout << "idTerms = ";
+    //DumpIntSet( idTerms );
+    string res;
+
+    // return a shape label:
+    // at present, shape label is like ((),(())). That is, no leaf labels
+    // just the type of topology. Note if we have (S1,S2), then S1 <= S2
+    if (idTerms.find(GetID()) != idTerms.end())
+    {
+        int idNum = 1;
+        if (mapNodeLabel.find(GetID()) != mapNodeLabel.end())
+        {
+            idNum = mapNodeLabel[GetID()];
+        }
+        char buf[100];
+        sprintf(buf, "%d", idNum);
+        res = buf;
+        //string str1 = "A";
+        //return str1;
+    }
+    //else
+    //	{
+    //		string strEmpty;
+    //		res = strEmpty;
+    //	}
+    //}
+    else
+    {
+        // otherwise get its descendent
+        vector<string> listLabels;
+        for (int i = 0; i < (int)listChildren.size(); ++i)
+        {
+            listLabels.push_back(listChildren[i]->GetShapeLabel(idTerms, mapNodeLabel));
+        }
+        // now sort it
+        for (int i = 0; i < (int)listLabels.size(); ++i)
+        {
+            for (int j = i + 1; j < (int)listLabels.size(); ++j)
+            {
+                // swap if needed
+                if (listLabels[i] > listLabels[j])
+                {
+                    string tmp = listLabels[i];
+                    listLabels[i] = listLabels[j];
+                    listLabels[j] = tmp;
+                }
+            }
+        }
+
+        // how many are not empty?
+        int numNonEmpty = 0;
+        for (int i = 0; i < (int)listLabels.size(); ++i)
+        {
+            if (listLabels[i].length() > 0)
+            {
+                numNonEmpty++;
+            }
+        }
+
+        // add it
+        bool fStart = false;
+        for (vector<string>::iterator it = listLabels.begin(); it != listLabels.end(); ++it)
+        {
+            if (it->length() > 0)
+            {
+                if (fStart == false)
+                {
+                    if (numNonEmpty > 1)
+                    {
+                        // add a header
+                        res = "(";
+                    }
+                }
+                else
+                {
+                    res += ",";
+                }
+                res += *it;
+
+                fStart = true;
+            }
+        }
+        if (fStart == true && numNonEmpty > 1)
+        //if( fStart == true  )
+        {
+            res += ")";
+        }
+    }
+    //cout << "res label for this node: " << res << endl;
+    return res;
+}
+
+// differeent from above, this one will apply label to the string label
+string TreeNode ::GetShapeLabel(const set<int> &idTerms, bool fSort) const
+{
+    //cout << "idTerms = ";
+    //DumpIntSet( idTerms );
+    string res;
+
+    // return a shape label:
+    // at present, shape label is like ((),(())). That is, no leaf labels
+    // just the type of topology. Note if we have (S1,S2), then S1 <= S2
+    if (idTerms.find(GetID()) != idTerms.end())
+    {
+        //int idNum = 1;
+        if (fSort == true)
+        {
+            res = "1";
+        }
+        else
+        {
+            char buf[100];
+            sprintf(buf, "%d", GetID());
+            res = buf;
+        }
+    }
+
+    else
+    {
+        // otherwise get its descendent
+        vector<string> listLabels;
+        for (int i = 0; i < (int)listChildren.size(); ++i)
+        {
+            listLabels.push_back(listChildren[i]->GetShapeLabel(idTerms, fSort));
+        }
+        // now sort it
+        if (fSort == true)
+        {
+            for (int i = 0; i < (int)listLabels.size(); ++i)
+            {
+                for (int j = i + 1; j < (int)listLabels.size(); ++j)
+                {
+                    // swap if needed
+                    if (listLabels[i] > listLabels[j])
+                    {
+                        string tmp = listLabels[i];
+                        listLabels[i] = listLabels[j];
+                        listLabels[j] = tmp;
+                    }
+                }
+            }
+        }
+
+        // how many are not empty?
+        int numNonEmpty = 0, numEmpty = 0;
+        for (int i = 0; i < (int)listLabels.size(); ++i)
+        {
+            if (listLabels[i].length() > 0)
+            {
+                numNonEmpty++;
+            }
+            else
+            {
+                numEmpty++;
+            }
+        }
+
+        // add it
+        bool fStart = false;
+        //bool fFirst = true;
+        bool fParenth = false;
+        //bool fSpaceAdded = false;
+        for (vector<string>::iterator it = listLabels.begin(); it != listLabels.end(); ++it)
+        {
+            // YW: only add "(" if there are more than 1 non-empty below
+            if (fStart == false && it->length() > 0)
+            {
+                // YW: just add a "("
+                //if(  (numNonEmpty >= 1 && numEmpty > 0 ) || numNonEmpty >= 2  )
+                //{
+                // add a header
+                if (numNonEmpty > 1)
+                {
+                    res = "(";
+                    fParenth = true;
+                }
+                res += *it;
+                fStart = true;
+                //}
+            }
+            else if (fStart == true)
+            {
+                // YW: only add "," if there is something
+                if (it->length() > 0)
+                {
+                    res += ",";
+                }
+                //fFirst = false;
+                if (it->length() > 0)
+                {
+
+                    res += *it;
+                    //fStart = true;
+                }
+                // YW: donot add anything if the branch is empty
+#if 0
+				else
+				{
+					// for empty branches, put a mark to it
+					// when there is something under it (that is shrink the entire subtree of unknown to a symbol -
+					if(numNonEmpty >= 1 && fSpaceAdded == false)
+					{
+						//
+						res += ",-";
+						fSpaceAdded = true;
+					}
+				}
+#endif
+            }
+        }
+        //if( fStart == true  && numNonEmpty >= 1)
+        if (fParenth == true)
+        {
+            res += ")";
+        }
+    }
+    //cout << "res label for this node: " << res << endl;
+    return res;
+}
+
+string TreeNode::GetShapeLabelNodeBrNum(map<TreeNode *, pair<int, int>> &mapNodeNumBrannches, vector<int> &listOrderedLeaves)
+{
+    // format: <num of underlying branches, event id>, negative for internal nodes
+    // the ordered leaves: correspond to their order of appearing in the output newick shape string
+    // this can be useful when you want to know how to match the leaves when some sort of comparision is needed
+    // get shape label. Different from above, the input is: <treenode, #ofbranches out of this node>
+    // convention: if #br < 0, it means all branches have descendents
+    listOrderedLeaves.clear();
+    if (this->IsLeaf() == true)
+    {
+        YW_ASSERT_INFO(mapNodeNumBrannches.find(this) != mapNodeNumBrannches.end(), "Leaf: not in map");
+        //cout << "Find one leaf: " << mapNodeNumBrannches[this].second << endl;
+        listOrderedLeaves.push_back(mapNodeNumBrannches[this].second);
+        return string("()");
+    }
+    else
+    {
+        YW_ASSERT_INFO(mapNodeNumBrannches.find(this) != mapNodeNumBrannches.end(), "Fail to find222");
+        //const TreeNode *pn = const_cast<const TreeNode *>( this );
+        int numBrWOChildRecur = mapNodeNumBrannches[this].first;
+        //cout << "numBrWOChildRecur = " << numBrWOChildRecur << endl;
+        multiset<string> setDescStrings;
+        map<string, set<vector<int>>> mapStringToVecLeaves;
+        for (int i = 0; i < (int)GetChildrenNum(); ++i)
+        {
+            //
+            TreeNode *pnchild = GetChild(i);
+            //
+            if (mapNodeNumBrannches.find(pnchild) != mapNodeNumBrannches.end())
+            {
+                //
+                vector<int> listOrderedLeavesStep;
+                string str = pnchild->GetShapeLabelNodeBrNum(mapNodeNumBrannches, listOrderedLeavesStep);
+                setDescStrings.insert(str);
+                if (mapStringToVecLeaves.find(str) == mapStringToVecLeaves.end())
+                {
+                    //
+                    set<vector<int>> ssint;
+                    mapStringToVecLeaves.insert(map<string, set<vector<int>>>::value_type(str, ssint));
+                }
+                mapStringToVecLeaves[str].insert(listOrderedLeavesStep);
+
+                //
+                --numBrWOChildRecur;
+            }
+        }
+        // add the remaiing by just filling the item
+        //vector<int> listLvIds;
+        for (int i = 0; i < numBrWOChildRecur; ++i)
+        {
+            string strLv = "()";
+            setDescStrings.insert(strLv);
+
+            //
+            if (mapStringToVecLeaves.find(strLv) == mapStringToVecLeaves.end())
+            {
+                //
+                set<vector<int>> ssint;
+                mapStringToVecLeaves.insert(map<string, set<vector<int>>>::value_type(strLv, ssint));
+            }
+            vector<int> vec1;
+            vec1.push_back(mapNodeNumBrannches[this].second);
+            mapStringToVecLeaves[strLv].insert(vec1);
+        }
+        //cout << "setdescstrings: ";
+        //for(multiset<string> :: iterator itgg = setDescStrings.begin(); itgg != setDescStrings.end(); ++itgg)
+        //{
+        //cout << *itgg << "   ";
+        //}
+        //cout << endl;
+        // now creat the contacation
+        YW_ASSERT_INFO(setDescStrings.size() > 1, "Can not be empty2");
+        string res = "(";
+        for (multiset<string>::iterator it = setDescStrings.begin(); it != setDescStrings.end(); ++it)
+        {
+            if (it != setDescStrings.begin())
+            {
+                res += ",";
+            }
+            res += *it;
+        }
+        res += ")";
+
+        // now assemble the list of ordered nodes
+        for (map<string, set<vector<int>>>::iterator itg = mapStringToVecLeaves.begin(); itg != mapStringToVecLeaves.end(); ++itg)
+        {
+            for (set<vector<int>>::iterator itg2 = itg->second.begin(); itg2 != itg->second.end(); ++itg2)
+            {
+                //cout << "In GetShapeLabelNodeBrNum: find a vector of sites: ";
+                //DumpIntVec(*itg2);
+                ConcatIntVec(listOrderedLeaves, *itg2);
+            }
+        }
+
+        return res;
+    }
+}
+
+int TreeNode ::GetLevel() const
+{
+    // choose a not efficient but simple coding
+    int res = 0;
+    for (int i = 0; i < (int)listChildren.size(); ++i)
+    {
+        int lvDesc = listChildren[i]->GetLevel();
+        if (lvDesc + 1 > res)
+        {
+            res = lvDesc + 1;
+        }
+    }
+    return res;
+}
+
+void TreeNode ::GetEdgeLabelsToChild(TreeNode *pChild, vector<int> &lbls)
+{
+    YW_ASSERT_INFO(listChildren.size() == listEdgeLabels.size(), "Child num and edge label num do not match");
+    lbls.clear();
+    for (int i = 0; i < (int)listChildren.size(); ++i)
+    {
+        if (listChildren[i] == pChild)
+        {
+            GetEdgeLabelsAtBranch(i, lbls);
+        }
+    }
+    //YW_ASSERT_INFO(false, "GetEdgeLabelsToChild :: Fail to find such child");
+}
+
+TreeNode *TreeNode ::GetMRCA(TreeNode *pOther)
+{
+    TreeNode *pRes = this;
+    int dummy;
+    while (pRes != NULL && pRes->IsAncesterOf(pOther, dummy) == false)
+    {
+        pRes = pRes->GetParent();
+    }
+    YW_ASSERT_INFO(pRes != NULL, "Fail to find MRCA");
+    return pRes;
+}
+
+int TreeNode ::GetNumEdgesToAncestor(TreeNode *pAssumedAncestor)
+{
+    // get # of edges betwene this node to its ancestor
+    // return -1 if the ancestor is not true ancestor
+    int res = 0;
+    TreeNode *pRes = this;
+    while (pRes != NULL && pRes != pAssumedAncestor)
+    {
+        ++res;
+        pRes = pRes->GetParent();
+    }
+    if (pRes == NULL)
+    {
+        res = -1;
+    }
+
+    return res;
+}
+
+void TreeNode ::GetSiblings(vector<TreeNode *> &listSibs)
+{
+    // siblings are parent's children (except itself)
+    listSibs.clear();
+    if (this->GetParent() != NULL)
+    {
+        //
+        for (int i = 0; i < this->GetParent()->GetChildrenNum(); ++i)
+        {
+            TreeNode *pn = this->GetParent()->GetChild(i);
+            if (pn != this)
+            {
+                listSibs.push_back(pn);
+            }
+        }
+    }
+}
+
+void TreeNode ::Order()
+{
+    // do nothing if leaf
+    if (IsLeaf() == true)
+    {
+        return;
+    }
+    // first order the leaves
+    for (int i = 0; i < (int)listChildren.size(); ++i)
+    {
+        listChildren[i]->Order();
+    }
+
+    //
+    vector<multiset<string>> listDescLeaves;
+    for (int i = 0; i < (int)listChildren.size(); ++i)
+    {
+        vector<string> vecLeafStrings;
+        listChildren[i]->GetAllLeafLabeles(vecLeafStrings);
+        multiset<string> setLeafStrings;
+        for (int j = 0; j < (int)vecLeafStrings.size(); ++j)
+        {
+            setLeafStrings.insert(vecLeafStrings[j]);
+        }
+        listDescLeaves.push_back(setLeafStrings);
+    }
+    //
+    YW_ASSERT_INFO(listEdgeLabels.size() == listChildren.size(), "Same size must be");
+    for (int i = 0; i < (int)listChildren.size(); ++i)
+    {
+        for (int j = i + 1; j < (int)listChildren.size(); ++j)
+        {
+            //
+            if (listDescLeaves[i] > listDescLeaves[j])
+            {
+                // exhcnage everything
+                TreeNode *ptmp = listChildren[i];
+                listChildren[i] = listChildren[j];
+                listChildren[j] = ptmp;
+
+                vector<int> vtmp = listEdgeLabels[i];
+                listEdgeLabels[i] = listEdgeLabels[j];
+                listEdgeLabels[j] = vtmp;
+
+                //
+                multiset<string> stmp = listDescLeaves[i];
+                listDescLeaves[i] = listDescLeaves[j];
+                listDescLeaves[j] = stmp;
+            }
+        }
+    }
+}
+
+int TreeNode ::GetIntLabel() const
+{
+    int res = -1;
+    sscanf(label.c_str(), "%d", &res);
+    return res;
+}
+
+void TreeNode ::SetIntLabel(int lbl)
+{
+    //
+    char buf[1024];
+    sprintf(buf, "%d", lbl);
+    label = buf;
+}
+
+bool TreeNode ::IsMulfurcate()
+{
+    if (IsLeaf() == true)
+    {
+        return false;
+    }
+    else
+    {
+        if (GetChildrenNum() > 2)
+        {
+            return true;
+        }
+        for (int ii = 0; ii < GetChildrenNum(); ++ii)
+        {
+            if (GetChild(ii)->IsMulfurcate() == true)
+            {
+                return true;
+            }
+        }
+
+        return false;
+    }
+}
+
+TreeNode *TreeNode ::GetRoot() const
+{
+    TreeNode *pself = const_cast<TreeNode *>(this);
+    TreeNode *proot = pself;
+    while (proot->GetParent() != NULL)
+    {
+        proot = proot->GetParent();
+    }
+    YW_ASSERT_INFO(proot != NULL, "Root is null");
+    return proot;
+}
+
+void TreeNode ::GetAllAncestors(set<TreeNode *> &listAncestors)
+{
+    if (GetParent() != NULL)
+    {
+        listAncestors.insert(GetParent());
+        GetParent()->GetAllAncestors(listAncestors);
+    }
+}
+
+void TreeNode ::GetAllChildren(set<TreeNode *> &setChildren) const
+{
+    //
+    //TreeNode *pthis = const_cast<TreeNode *>(this);
+    //PopulateSetByVecGen( setChildren, pthis->listChildren );
+    setChildren.clear();
+    for (int i = 0; i < GetChildrenNum(); ++i)
+    {
+        setChildren.insert(listChildren[i]);
+    }
+}
+
+int TreeNode ::GetChildIndex(TreeNode *pchild) const
+{
+    // get the index of this particular child; if not found, the error
+    TreeNode *pself = const_cast<TreeNode *>(this);
+    int res = -1;
+    for (int i = 0; i < (int)listChildren.size(); ++i)
+    {
+        if (pself->GetChild(i) == pchild)
+        {
+            res = i;
+            break;
+        }
+    }
+    YW_ASSERT_INFO(res >= 0, "Fail to find666");
+    return res;
+}
+
+void TreeNode ::RemoveLabels()
+{
+    // remove all edge labels (i.e. make them empty)
+    int numLLs = listEdgeLabels.size();
+    listEdgeLabels.clear();
+    listEdgeLabels.resize(numLLs);
+
+    // then reurrisve do it
+    for (int i = 0; i < GetChildrenNum(); ++i)
+    {
+        GetChild(i)->RemoveLabels();
+    }
+}
+
+void TreeNode ::RemoveLabelsPar()
+{
+    // remove the parent to this node's label
+    TreeNode *ppar = GetParent();
+    if (ppar == NULL)
+    {
+        return;
+    }
+    int childIndex = ppar->GetChildIndex(this);
+    YW_ASSERT_INFO(childIndex < (int)ppar->listEdgeLabels.size(), "Overflow");
+    ppar->listEdgeLabels[childIndex].clear();
+}
+
+void TreeNode ::IncEdgeLabelsBy(int offset, bool fSub)
+{
+    //
+    for (int i = 0; i < (int)listEdgeLabels.size(); ++i)
+    {
+        for (int j = 0; j < listEdgeLabels[i].size(); ++j)
+        {
+            listEdgeLabels[i][j] += offset;
+        }
+    }
+    if (fSub)
+    {
+        for (int i = 0; i < (int)listChildren.size(); ++i)
+        {
+            listChildren[i]->IncEdgeLabelsBy(offset, fSub);
+        }
+    }
+}
+
+void TreeNode ::Binarize(int &idToUseNext)
+{
+    // recursively make the tree binary
+    // if this node has more than 2 children, create a new internal node
+    if (GetChildrenNum() > 2)
+    {
+        //
+        TreeNode *pnode = new TreeNode(idToUseNext++);
+        for (int i = 1; i < GetChildrenNum(); ++i)
+        {
+            vector<int> ss;
+            pnode->AddChild(GetChild(i), ss);
+        }
+        TreeNode *pn1 = GetChild(0);
+        this->listChildren.clear();
+        this->listChildren.push_back(pn1);
+        vector<int> ss;
+        AddChild(pnode, ss);
+    }
+
+    for (int i = 0; i < GetChildrenNum(); ++i)
+    {
+        //
+        GetChild(i)->Binarize(idToUseNext);
+    }
+}
+
+int TreeNode ::GetMaxIdWithinSubtree() const
+{
+    //
+    int res = GetID();
+    TreeNode *pthis = const_cast<TreeNode *>(this);
+    for (int i = 0; i < GetChildrenNum(); ++i)
+    {
+        TreeNode *pnc = pthis->GetChild(i);
+        int nc = pnc->GetMaxIdWithinSubtree();
+        if (nc > res)
+        {
+            //
+            res = nc;
+        }
+    }
+    return res;
+}
+
+int TreeNode ::GetNumNodesUnder(bool fInternalOnly, bool fAddNonBinary) const
+{
+    // fInternalOnly: true if only count internal node
+    // include itself if this is an internal node
+    // fAddNonBinary: true if an internal node is considered to have multiple (hidden) nodes
+    int res = 0;
+    if (fInternalOnly == false || IsLeaf() == false)
+    {
+        res = 1;
+    }
+    // recursively check all children
+    TreeNode *pn = const_cast<TreeNode *>(this);
+    for (int i = 0; i < GetChildrenNum(); ++i)
+    {
+        res += pn->GetChild(i)->GetNumNodesUnder(fInternalOnly, fAddNonBinary);
+    }
+    return res;
+}
+
+// ***************************************************************************
+// Utilites functions
+// ***************************************************************************
+
+void PhylogenyTreeIteratorBacktrack ::Init()
+{
+    while (stackNodesToExplore.empty() == false)
+    {
+        stackNodesToExplore.pop();
+    }
+    //cout << "Nnow stack empty.\n";
+    // Now recurisvely store the order of the walk
+    TreeNode *rootNode = phyTree.GetRoot();
+    if (rootNode != NULL)
+    {
+        stackNodesToExplore.push(rootNode);
+    }
+}
+
+void PhylogenyTreeIteratorBacktrack ::Next()
+{
+    if (stackNodesToExplore.empty() == true)
+    {
+        return;
+    }
+    TreeNode *pn = stackNodesToExplore.top();
+    // push its descendent in
+    stackNodesToExplore.pop();
+    for (int i = 0; i < (int)pn->GetChildrenNum(); ++i)
+    {
+        //
+        stackNodesToExplore.push(pn->GetChild(i));
+    }
+}
+void PhylogenyTreeIteratorBacktrack ::Back()
+{
+    if (stackNodesToExplore.empty() == true)
+    {
+        return;
+    }
+    // simply get rid of the current node
+    stackNodesToExplore.pop();
+}
+
+bool PhylogenyTreeIteratorBacktrack ::IsDone()
+{
+    return stackNodesToExplore.empty();
+}
+
+TreeNode *PhylogenyTreeIteratorBacktrack ::GetCurrNode()
+{
+    if (IsDone() == false)
+    {
+        return stackNodesToExplore.top();
+    }
+    else
+    {
+        return NULL;
+    }
+}
+
+///////////////////////////////////////////////////////////////////
+void PhylogenyTreeIterator ::Init()
+{
+    while (stackPostorder.empty() == false)
+    {
+        stackPostorder.pop();
+    }
+    //cout << "Nnow stack empty.\n";
+    // Now recurisvely store the order of the walk
+    TreeNode *rootNode = phyTree.GetRoot();
+    if (rootNode != NULL)
+    {
+        phyTree.PostOrderPushStack(rootNode, stackPostorder);
+    }
+}
+
+void PhylogenyTreeIterator ::Next()
+{
+    if (stackPostorder.empty() == true)
+    {
+        return;
+    }
+    //TreeNode *pn = stackPostorder.top();
+    stackPostorder.pop();
+}
+
+bool PhylogenyTreeIterator ::IsDone()
+{
+    return stackPostorder.empty();
+}
+
+TreeNode *PhylogenyTreeIterator ::GetCurrNode()
+{
+    if (IsDone() == false)
+    {
+        return stackPostorder.top();
+    }
+    else
+    {
+        return NULL;
+    }
+}
+
+// ***************************************************************************
+// Main functions
+// ***************************************************************************
+
+PhylogenyTreeBasic ::PhylogenyTreeBasic() : rootNode(NULL), numLeaves(-1)
+{
+}
+
+PhylogenyTreeBasic ::~PhylogenyTreeBasic()
+{
+    //cout << "Deleting tree: ";
+    //Dump();
+
+    // Should delete the tree
+    if (rootNode != NULL)
+    {
+        delete rootNode;
+        rootNode = NULL;
+    }
+}
+
+PhylogenyTreeBasic *PhylogenyTreeBasic ::Copy()
+{
+    PhylogenyTreeBasic *pCopy = new PhylogenyTreeBasic;
+    pCopy->numLeaves = pCopy->numLeaves;
+    pCopy->SetRoot(this->GetRoot()->Copy());
+    return pCopy;
+}
+
+void PhylogenyTreeBasic ::PostOrderPushStack(TreeNode *treeNode, stack<TreeNode *> &stackPostorder)
+{
+    stackPostorder.push(treeNode);
+    //cout << "Pusing node " << treeNode->GetLabel() << endl;
+
+    for (int i = 0; i < (int)treeNode->listChildren.size(); ++i)
+    {
+        PostOrderPushStack(treeNode->listChildren[i], stackPostorder);
+    }
+}
+
+void PhylogenyTreeBasic ::ConsOnNewick(const string &nwString, int numLeaves, bool fBottomUp, TaxaMapper *pTMapper)
+{
+    // Here we try to reconstruct from a newick string here
+    // This function creates the tree by creating and linking tree nodes
+    // Make sure the tree is empty
+    if (rootNode != NULL)
+    {
+        delete rootNode;
+        rootNode = NULL;
+    }
+
+    // we perform this by recursively
+    int invId = 1000000;
+    if (numLeaves > 0)
+    {
+        // here we assume leaf id starts from 0, will check it
+        invId = numLeaves;
+    }
+    int leafId = 0;
+    rootNode = ConsOnNewickSubtree(nwString, leafId, invId, numLeaves, fBottomUp, pTMapper);
+}
+
+void PhylogenyTreeBasic ::ConsOnNewickDupLabels(const string &nwString, TaxaMapper *pTMapper)
+{
+    // Here we try to reconstruct from a newick string here
+    // This function creates the tree by creating and linking tree nodes
+    // Make sure the tree is empty
+    if (rootNode != NULL)
+    {
+        delete rootNode;
+        rootNode = NULL;
+    }
+
+    // we perform this by recursively
+    int numLeaves = GetNewickNumLeaves(nwString);
+    // we start counting leaves from 0
+    int invId = numLeaves;
+    int leafId = 0;
+    //cout << "Num of leaves = " << numLeaves << endl;
+    rootNode = ConsOnNewickSubtreeDupLabels(nwString, invId, leafId, pTMapper);
+}
+
+// ********************************************************************************
+// Utitlieis for construcing edge label trees
+
+static int GetEdgeLabelPosFrom(const string &strMutTreeCur, int posCur)
+{
+    //
+    int posCurGNTPF = posCur;
+    while (posCurGNTPF < (int)strMutTreeCur.length())
+    {
+        //printf "getNextTaxaPosFrom: %d: curr ch: %s\n", posCurGNTPF, substr(strMutTreeCur,posCurGNTPF,1);
+        if (strMutTreeCur[posCurGNTPF] == '#')
+        {
+            break;
+        }
+        ++posCurGNTPF;
+    }
+    if (posCurGNTPF >= (int)strMutTreeCur.length())
+    {
+        posCurGNTPF = -1;
+    }
+    return posCurGNTPF;
+}
+
+static int getNextTaxaPosFromLevelUp(const string &strMutTreeCur, int posCur)
+{
+    int posCurGNTPF = posCur;
+    int level = 0;
+    bool fUpperOnly = false;
+    while (posCurGNTPF < (int)strMutTreeCur.length())
+    {
+        char chGNTPF = strMutTreeCur[posCurGNTPF];
+        if (chGNTPF == '#' && ((level >= 0 && fUpperOnly == false) || level > 0))
+        {
+            break;
+        }
+        if (chGNTPF == '(')
+        {
+            --level;
+        }
+        else if (chGNTPF == ')')
+        {
+            ++level;
+        }
+        else if (chGNTPF == ',')
+        {
+            fUpperOnly = true;
+        }
+
+        ++posCurGNTPF;
+    }
+    if (posCurGNTPF >= (int)strMutTreeCur.length())
+    {
+        posCurGNTPF = -1;
+    }
+    return posCurGNTPF;
+}
+
+static string getTaxaAt(const string &strMutTreeCur, int posCur)
+{
+    int posGTA = posCur;
+    if (strMutTreeCur[posCur] == '#')
+    {
+        posGTA = posCur + 1;
+    }
+    //  now find where it ends
+    int posGTA2 = posGTA;
+    while (posGTA2 < (int)strMutTreeCur.length())
+    {
+        char chGTA = strMutTreeCur[posGTA2];
+        if (chGTA == '#' || chGTA == ',' || chGTA == ')')
+        {
+            break;
+        }
+        ++posGTA2;
+    }
+    if (posGTA2 > (int)strMutTreeCur.length())
+    {
+        posGTA2 = (int)strMutTreeCur.length() - 1;
+    }
+    return strMutTreeCur.substr(posGTA, posGTA2 - posGTA);
+}
+
+void PhylogenyTreeBasic ::ConsOnNewickEdgeLabelTree(const string &nwString)
+{
+    // view each edge label as taxon; a stand-alone edge label is the leaf;
+    // edge label may or may not have a leading seperator (# in this implementation);
+    // e.g. ((#1,#2#3)#4)  this give four node, one for each edge label
+    if (rootNode != NULL)
+    {
+        delete rootNode;
+        rootNode = NULL;
+    }
+    // find all edge labels and how they are related
+    map<string, string> mapEdgeLabelPar;
+    int posEdgeLbl = 0;
+    while (posEdgeLbl < (int)nwString.length())
+    {
+        //
+        posEdgeLbl = GetEdgeLabelPosFrom(nwString, posEdgeLbl);
+        if (posEdgeLbl < 0)
+        {
+            break;
+        }
+        string strTaxon = getTaxaAt(nwString, posEdgeLbl);
+        // find its parent
+        int posEdgeLblPar = getNextTaxaPosFromLevelUp(nwString, posEdgeLbl + 1);
+        string strPar;
+        if (posEdgeLblPar >= 0)
+        {
+            //
+            strPar = getTaxaAt(nwString, posEdgeLblPar);
+        }
+        mapEdgeLabelPar[strTaxon] = strPar;
+        //cout << "Taxon: " << strTaxon << " is child of " << strPar << endl;
+        ++posEdgeLbl;
+    }
+    // now create nodes
+    int nidNext = 1;
+    this->rootNode = new TreeNode(nidNext++);
+    string strLblRoot = "-";
+    int posRootLbl = -1;
+    std::size_t pos1 = nwString.find_last_of(')');
+    std::size_t pos2 = nwString.find_last_of('#');
+    if (pos1 != string::npos && pos2 != string::npos)
+    {
+        posRootLbl = max(pos1, pos2);
+    }
+    else if (pos1 != string::npos)
+    {
+        posRootLbl = pos1;
+    }
+    else if (pos2 != string::npos)
+    {
+        posRootLbl = pos2;
+    }
+    if (posRootLbl >= 0)
+    {
+        strLblRoot = getTaxaAt(nwString, posRootLbl);
+    }
+
+    //cout << "root label: " << strLblRoot << endl;
+    // now create all descendents
+    map<string, TreeNode *> mapNodes;
+    mapNodes[strLblRoot] = this->rootNode;
+    while (true)
+    {
+        // find direct descendents
+        TreeNode *pnPar = NULL;
+        string strChildUse;
+        for (map<string, string>::iterator it = mapEdgeLabelPar.begin(); it != mapEdgeLabelPar.end(); ++it)
+        {
+            string strChild = it->first;
+            string strPar = it->second;
+            if (mapNodes.find(strChild) == mapNodes.end() && mapNodes.find(strPar) != mapNodes.end())
+            {
+                pnPar = mapNodes[strPar];
+                strChildUse = strChild;
+            }
+        }
+        if (pnPar == NULL)
+        {
+            break;
+        }
+        TreeNode *pnode = new TreeNode(nidNext++);
+        pnode->SetLabel(strChildUse);
+        vector<int> listLblsDummy;
+        pnPar->AddChild(pnode, listLblsDummy);
+
+        mapNodes[strChildUse] = pnode;
+    }
+
+    if (strLblRoot.length() == 0)
+    {
+        strLblRoot = "-";
+    }
+    this->rootNode->SetLabel(strLblRoot);
+}
+
+void PhylogenyTreeBasic ::InitPostorderWalk()
+{
+    //cout << "InitPostorderWalk() entry\n";
+    // when walk, return the value of the node if any
+    // Clearup the previous storage if any
+    while (stackPostorder.empty() == false)
+    {
+        stackPostorder.pop();
+    }
+    //cout << "Nnow stack empty.\n";
+    // Now recurisvely store the order of the walk
+    if (rootNode != NULL)
+    {
+        PostOrderPushStack(rootNode, stackPostorder);
+    }
+}
+
+TreeNode *PhylogenyTreeBasic ::NextPostorderWalk()
+{
+    // Return false, when nothing to go any more
+    if (stackPostorder.empty() == true)
+    {
+        return NULL;
+    }
+    TreeNode *pn = stackPostorder.top();
+    stackPostorder.pop();
+
+//    node = pn;
+#if 0
+    if( pn->nodeValues.size() > 0 )
+    {
+        // There is valid node value stored here
+        nodeValue = pn->nodeValues[0];
+    }
+    else
+    {
+        nodeValue = -1;     // no node value is stored here
+    }
+#endif
+    return pn;
+}
+
+void PhylogenyTreeBasic ::OutputGML(const char *inFileName)
+{
+    // Now output a file in GML format
+    // First create a new name
+    string name = inFileName;
+    //cout << "num edges = " << listEdges.size() << endl;
+
+    DEBUG("FileName=");
+    DEBUG(name);
+    DEBUG("\n");
+    // Now open file to write out
+    ofstream outFile(name.c_str());
+
+    // First output some header info
+    outFile << "graph [\n";
+    outFile << "comment ";
+    OutputQuotedString(outFile, "Automatically generated by Graphing tool");
+    outFile << "\ndirected  1\n";
+    outFile << "id  1\n";
+    outFile << "label ";
+    OutputQuotedString(outFile, "Phylogeny Tree....\n");
+
+    // Now output all the vertices
+    //	int i;
+    stack<TreeNode *> nodesStack;
+    if (rootNode != NULL)
+    {
+        nodesStack.push(rootNode);
+    }
+    //cout << "a.1.1\n";
+    while (nodesStack.empty() == false)
+    {
+        TreeNode *pn = nodesStack.top();
+        nodesStack.pop();
+
+        outFile << "node [\n";
+
+        outFile << "id " << pn->id << endl;
+        outFile << "label ";
+        string nameToUse = " ";
+        if (pn->GetLabel() != "-")
+        {
+            nameToUse = pn->GetLabel();
+        }
+#if 0
+        else
+        {
+            // we take the nonde value here
+            char buf[100];
+            if( pn->nodeValues.size() > 0 )
+            {
+                sprintf(buf, "(%d)", pn->nodeValues[0] );        // CAUTION, here we assume each leaf has exactly 1 label
+                nameToUse = buf;
+            }
+            else
+            {
+                // if no nodes value is set, still use label
+         //       nameToUse = pn->GetLabel();
+
+                // YW: TBD change
+                nameToUse.empty();
+            }
+        }
+#endif
+        const char *name = nameToUse.c_str();
+
+        // 		char name[100];
+        //       if( pn->IsLeaf() == false)
+        //        {
+        //		    name[0] = 'v';
+        //		    sprintf(&name[1], "%d", pn->id);
+        //        }
+        //        else
+        //        {
+        // For leaf, we simply output their value (row number)
+        //            sprintf(name, "%d", pn->nodeValues[0] );        // CAUTION, here we assume each leaf has exactly 1 label
+        //        }
+        OutputQuotedString(outFile, name);
+        outFile << endl;
+
+        // See if we need special shape here
+        if (pn->GetShape() == PHY_TN_RECTANGLE)
+        {
+            outFile << "vgj [ \n shape  ";
+            OutputQuotedString(outFile, "Rectangle");
+            outFile << "\n]\n";
+        }
+        else
+        {
+            outFile << "defaultAtrribute   1\n";
+        }
+
+        outFile << "]\n";
+
+        // Now try to get more nodes
+        for (int i = 0; i < (int)pn->listChildren.size(); ++i)
+        {
+            nodesStack.push(pn->listChildren[i]);
+        }
+        //cout << "a.1.2\n";
+    }
+    //cout << "a.1.3\n";
+
+    // Now output all the edges, by again starting from root and output all nodes
+    YW_ASSERT(nodesStack.empty() == true);
+    if (rootNode != NULL)
+    {
+        nodesStack.push(rootNode);
+    }
+    while (nodesStack.empty() == false)
+    {
+        TreeNode *pn = nodesStack.top();
+        nodesStack.pop();
+
+        for (int i = 0; i < (int)pn->listChildren.size(); ++i)
+        {
+
+            //cout << "Output an edge \n";
+            outFile << "edge [\n";
+            outFile << "source " << pn->id << endl;
+            outFile << "target  " << pn->listChildren[i]->id << endl;
+            outFile << "label ";
+            if (pn->listEdgeLabels[i].size() > 0)
+            {
+                string lblName;
+                char name[100];
+                //		        name[0] = 'e';
+                for (int iel = 0; iel < (int)pn->listEdgeLabels[i].size(); ++iel)
+                {
+                    sprintf(name, "e%d  ", pn->listEdgeLabels[i][iel]);
+                    lblName += name;
+                }
+                OutputQuotedString(outFile, lblName.c_str());
+            }
+            else
+            {
+                OutputQuotedString(outFile, "");
+            }
+            outFile << "\n";
+            outFile << "]\n";
+
+            // Store next one to stack
+            nodesStack.push(pn->listChildren[i]);
+        }
+    }
+
+    // Finally quite after closing file
+    outFile << "\n]\n";
+    outFile.close();
+}
+
+void PhylogenyTreeBasic ::OutputGMLNoLabel(const char *inFileName)
+{
+    //
+    // Now output a file in GML format
+    // First create a new name
+    string name = inFileName;
+    //cout << "num edges = " << listEdges.size() << endl;
+
+    DEBUG("FileName=");
+    DEBUG(name);
+    DEBUG("\n");
+    // Now open file to write out
+    ofstream outFile(name.c_str());
+
+    // First output some header info
+    outFile << "graph [\n";
+    outFile << "comment ";
+    OutputQuotedString(outFile, "Automatically generated by Graphing tool");
+    outFile << "\ndirected  1\n";
+    outFile << "id  1\n";
+    outFile << "label ";
+    OutputQuotedString(outFile, "Phylogeny Tree....\n");
+
+    // Now output all the vertices
+    //	int i;
+    stack<TreeNode *> nodesStack;
+    if (rootNode != NULL)
+    {
+        nodesStack.push(rootNode);
+    }
+    //cout << "a.1.1\n";
+    while (nodesStack.empty() == false)
+    {
+        TreeNode *pn = nodesStack.top();
+        nodesStack.pop();
+
+        outFile << "node [\n";
+
+        outFile << "id " << pn->id << endl;
+        outFile << "label ";
+        string nameToUse = " ";
+        const char *name = nameToUse.c_str();
+
+        // 		char name[100];
+        //       if( pn->IsLeaf() == false)
+        //        {
+        //		    name[0] = 'v';
+        //		    sprintf(&name[1], "%d", pn->id);
+        //        }
+        //        else
+        //        {
+        // For leaf, we simply output their value (row number)
+        //            sprintf(name, "%d", pn->nodeValues[0] );        // CAUTION, here we assume each leaf has exactly 1 label
+        //        }
+        OutputQuotedString(outFile, name);
+        outFile << endl;
+
+        // See if we need special shape here
+        if (pn->GetShape() == PHY_TN_RECTANGLE)
+        {
+            outFile << "vgj [ \n shape  ";
+            OutputQuotedString(outFile, "Rectangle");
+            outFile << "\n]\n";
+        }
+        else
+        {
+            outFile << "defaultAtrribute   1\n";
+        }
+
+        outFile << "]\n";
+
+        // Now try to get more nodes
+        for (int i = 0; i < (int)pn->listChildren.size(); ++i)
+        {
+            nodesStack.push(pn->listChildren[i]);
+        }
+        //cout << "a.1.2\n";
+    }
+    //cout << "a.1.3\n";
+
+    // Now output all the edges, by again starting from root and output all nodes
+    YW_ASSERT(nodesStack.empty() == true);
+    if (rootNode != NULL)
+    {
+        nodesStack.push(rootNode);
+    }
+    while (nodesStack.empty() == false)
+    {
+        TreeNode *pn = nodesStack.top();
+        nodesStack.pop();
+
+        for (int i = 0; i < (int)pn->listChildren.size(); ++i)
+        {
+
+            //cout << "Output an edge \n";
+            outFile << "edge [\n";
+            outFile << "source " << pn->id << endl;
+            outFile << "target  " << pn->listChildren[i]->id << endl;
+            outFile << "label ";
+            if (pn->listEdgeLabels[i].size() > 0)
+            {
+                string lblName;
+                char name[100];
+                //		        name[0] = 'e';
+                for (int iel = 0; iel < (int)pn->listEdgeLabels[i].size(); ++iel)
+                {
+                    sprintf(name, "e%d  ", pn->listEdgeLabels[i][iel]);
+                    lblName += name;
+                }
+                OutputQuotedString(outFile, lblName.c_str());
+            }
+            else
+            {
+                OutputQuotedString(outFile, "");
+            }
+            outFile << "\n";
+            outFile << "]\n";
+
+            // Store next one to stack
+            nodesStack.push(pn->listChildren[i]);
+        }
+    }
+
+    // Finally quite after closing file
+    outFile << "\n]\n";
+    outFile.close();
+}
+
+// construct a newick string for this tree
+void PhylogenyTreeBasic ::ConsNewick(string &strNewick, bool wGridLen, double gridWidth, bool fUseCurLbl)
+{
+    strNewick.empty();
+
+    // work from this node
+    YW_ASSERT_INFO(rootNode != NULL, "Root is not set");
+    strNewick = ConsNewickTreeNode(rootNode, wGridLen, gridWidth, fUseCurLbl, false);
+}
+
+void PhylogenyTreeBasic ::ConsNewickSorted(string &strNewick, bool wGridLen, double gridWidth, bool fUseCurLbl)
+{
+    strNewick.empty();
+
+    // work from this node
+    YW_ASSERT_INFO(rootNode != NULL, "Root is not set");
+    strNewick = ConsNewickTreeNode(rootNode, wGridLen, gridWidth, fUseCurLbl, true);
+}
+
+void PhylogenyTreeBasic ::ConsNewickEdgeLabel(string &strNewick)
+{
+    strNewick.empty();
+
+    // work from this node
+    YW_ASSERT_INFO(rootNode != NULL, "Root is not set");
+    strNewick = ConsNewickTreeNode(rootNode, false, 1.0, true, true, true);
+}
+
+string PhylogenyTreeBasic ::ConsNewickTreeNode(TreeNode *pNode, bool wGridLen, double gridWidth, bool fUseCurLbl, bool fSort, bool fEdgeLbel)
+{
+    //cout << "--------------------------------In ConsNewickTreeNode: I am here\n";
+    string resNodeStr;
+    // Is this node a leaf? If so, we output the label of it
+    if (pNode->IsLeaf() == true)
+    {
+        // Add this label if this label is not there
+        string tmpstr = pNode->GetUserLabel();
+        if (fUseCurLbl == true)
+        {
+            tmpstr = pNode->GetLabel();
+        }
+        resNodeStr = tmpstr;
+    }
+    else
+    {
+        string tmpstr = pNode->GetLabel();
+        YW_ASSERT_INFO(pNode->listChildren.size() >= 1, "Must have some children here.");
+
+        // When there is only one child and no self-label
+        if (tmpstr.size() <= 2 && pNode->listChildren.size() == 1)
+        {
+            resNodeStr = ConsNewickTreeNode(pNode->listChildren[0], wGridLen, gridWidth, fUseCurLbl, fSort, fEdgeLbel);
+        }
+        else
+        {
+
+            // Otherwise, we simply collect all sub strings here, and sepearate by a ,
+            string comboStrName = "(";
+
+            bool fAddSep = false;
+            // does this node has a label by itself? if so, output it
+            if (tmpstr.size() > 2)
+            {
+                comboStrName += tmpstr.substr(1, tmpstr.size() - 2);
+                //comboStrName += ",";
+
+                // all others should be added sep.
+                fAddSep = true;
+            }
+
+            // handle its children
+            if (fSort == false)
+            {
+                for (unsigned int i = 0; i < pNode->listChildren.size(); ++i)
+                {
+                    string stepRes = ConsNewickTreeNode(pNode->listChildren[i], wGridLen, gridWidth, fUseCurLbl, fSort, fEdgeLbel);
+
+                    if (stepRes.size() > 0)
+                    {
+                        if (fAddSep == true)
+                        {
+                            comboStrName += ",";
+                        }
+
+                        comboStrName += stepRes;
+
+                        // from now on, add sep
+                        fAddSep = true;
+
+                        //if( i+1 < pNode->listChildren.size() )
+                        //{
+                        //    comboStrName += ",";
+                        //}
+                    }
+                }
+            }
+            else
+            {
+                // sort the labels from children
+                multiset<string> strsChildren;
+                for (unsigned int i = 0; i < pNode->listChildren.size(); ++i)
+                {
+                    string stepRes = ConsNewickTreeNode(pNode->listChildren[i], wGridLen, gridWidth, fUseCurLbl, fSort, fEdgeLbel);
+                    if (stepRes.size() > 0)
+                    {
+                        strsChildren.insert(stepRes);
+                    }
+                }
+                for (multiset<string>::iterator it = strsChildren.begin(); it != strsChildren.end(); ++it)
+                {
+                    //
+                    if (fAddSep == true)
+                    {
+                        comboStrName += ",";
+                    }
+
+                    comboStrName += *it;
+
+                    // from now on, add sep
+                    fAddSep = true;
+                }
+            }
+            comboStrName += ")";
+            //cout << "comboStrName = " << comboStrName << endl;
+            resNodeStr = comboStrName;
+        }
+    }
+
+    // now see if we need to add length info
+    //
+    if (wGridLen == true)
+    {
+        //
+        TreeNode *pNodePar = pNode->GetParent();
+        if (pNodePar != NULL)
+        {
+            double len = gridWidth * (pNodePar->GetLevel() - pNode->GetLevel());
+            //cout << "**************************PhylogenyTreeBasic::len = " << len << endl;
+            char buf[100];
+            sprintf(buf, ":%f", len);
+            resNodeStr += buf;
+        }
+    }
+    else if (pNode->GetLength() >= 0.0)
+    {
+#if 0
+        // if length is set, add it
+        resNodeStr += ":";
+        resNodeStr += ConvToString(pNode->GetLength() );
+#endif
+    }
+
+    if (fEdgeLbel)
+    {
+        TreeNode *pParNode = pNode->GetParent();
+        if (pParNode != NULL)
+        {
+            int cIndex = pParNode->GetChildIndex(pNode);
+
+            // add edge label in the format: s1s2s3....
+            string strEdgeLbel;
+            vector<int> listEdgeLabels;
+            pParNode->GetEdgeLabelsAtBranch(cIndex, listEdgeLabels);
+
+            //cout << "cIndex: " << cIndex <<", listEdgeLabels: ";
+            //DumpIntVec(listEdgeLabels);
+
+            for (int i = 0; i < (int)listEdgeLabels.size(); ++i)
+            {
+                char buf[10000];
+                sprintf(buf, "#%d", listEdgeLabels[i]);
+                strEdgeLbel += buf;
+            }
+            if (strEdgeLbel.length() > 0)
+            {
+                resNodeStr += ":";
+                resNodeStr += strEdgeLbel;
+            }
+        }
+    }
+
+    return resNodeStr;
+}
+
+// This function adds a new tree node, and return it. Also set the parent node to the pareamter
+TreeNode *PhylogenyTreeBasic ::AddTreeNode(TreeNode *parNode, int id)
+{
+    if (id < 0)
+    {
+        id = GetNumVertices();
+    }
+
+    TreeNode *pnode = new TreeNode(id);
+    pnode->AddNodeValue(id);
+
+    // Should delete the tree
+    if (parNode == NULL)
+    {
+        YW_ASSERT_INFO(rootNode == NULL, "Can not add a node with no parent if the tree is not empty");
+        rootNode = pnode;
+        return pnode;
+    }
+
+    // Otherwise, set the parent
+    SEQUENCE emptySeq;
+    parNode->AddChild(pnode, emptySeq);
+    return pnode;
+}
+
+int PhylogenyTreeBasic ::GetNumVertices() const
+{
+    int res = 0;
+    stack<TreeNode *> stackNodes;
+    if (rootNode != NULL)
+    {
+        stackNodes.push(rootNode);
+    }
+    while (stackNodes.empty() == false)
+    {
+        TreeNode *pcurr = stackNodes.top();
+        stackNodes.pop();
+        ++res;
+        // Now enque its children
+        for (int i = 0; i < (int)pcurr->listChildren.size(); ++i)
+        {
+            stackNodes.push(pcurr->listChildren[i]);
+        }
+    }
+    return res;
+}
+
+//int PhylogenyTreeBasic :: GetIdFromStr( const string &strPart, TaxaMapper *pTMapper )
+//{
+//cout << "GetIdFromStr: " << strPart << endl;
+//	string strToUse = strPart;
+//	size_t posSeparator = strPart.find( ':' );
+//	if( posSeparator != string::npos )
+//	{
+//		strToUse = strPart.substr(0, (int)posSeparator  );
+//	}
+//	// get rid of
+//	int res = -1;
+//	if( pTMapper == NULL)
+//	{
+//		sscanf( strToUse.c_str(), "%d", &res  );
+//cout << "Empty mapper\n";
+//	}
+//	else
+//	{
+//		// are we reading in the first tree or not
+//		if( pTMapper->IsInitialized() == true )
+//		{
+//			res  = pTMapper->GetId(strToUse);
+//cout << "GetIdFromStr: GetId: " << strToUse << ": " << res << endl;
+//		}
+//		else
+//		{
+//			// this is new
+//			res = pTMapper->AddTaxaString( strToUse );
+//cout << "GetIdFromStr: New id: " << strToUse << ": " << res << endl;
+//		}
+//	}
+//	return res;
+//}
+
+TreeNode *PhylogenyTreeBasic ::ConsOnNewickSubtree(const string &nwStringPart, int &leafId, int &invId, int numLeaves, bool fBottomUp, TaxaMapper *pTMapper)
+{
+    //cout << "Entry nwStringPart = "<< nwStringPart << endl;
+
+    TreeNode *pres = NULL;
+    int posLenBegin = -1;
+
+    // this function builds recursively subtrees for this part of string
+    // First, is this string a leaf or not
+    if (nwStringPart[0] != '(')
+    {
+        //TreeNode *pLeaf = new TreeNode( nodeId  );
+        //// also set its label this way
+        //pLeaf->AddNodeValue( nodeId );
+
+        // 7/27/10 YW: for now, we take this convention:
+        // tree node id = label  if no mapper is passed
+        // Why? This case is by default for internal use only
+        // while mapper is used for external (user) specified
+        // Yes, this is a leaf
+        int nodeId = TaxaMapper ::GetIdFromStr(nwStringPart, pTMapper);
+        //	sscanf( nwStringPart.c_str(), "%d", &nodeId  );
+
+        if (numLeaves > 0)
+        {
+            if (nodeId >= numLeaves)
+            {
+                cout << "Wrong: nodeId = " << nodeId << ", numLeaves = " << numLeaves << endl;
+            }
+            YW_ASSERT_INFO(nodeId < numLeaves, "We assume in phylogeny tree, leaf id starts from 0");
+        }
+        //cout << "node id = " << nodeId << endl;
+
+        int idtouse = leafId;
+        if (pTMapper == NULL)
+        {
+            // in this case take the same as node id
+            idtouse = nodeId;
+        }
+        else
+        {
+            // update leafid since we are using it
+            leafId++;
+        }
+
+        TreeNode *pLeaf = new TreeNode(idtouse);
+        // also set its label this way
+        pLeaf->AddNodeValue(idtouse);
+        //leafId ++;
+
+        // get rid of any part after : if there is length info
+        //string strLeafLabel = nwStringPart;
+        //if( strLa )
+        //{
+        //}
+        string strLbl = GetStringFromId(nodeId);
+        pLeaf->SetLabel(strLbl);
+
+        string strLblUser = TaxaMapper ::ExtractIdPartFromStr(nwStringPart);
+        pLeaf->SetUserLabel(strLblUser);
+
+        //cout << "ConsOnNewickSubtree: set leaf label: " << strLbl << endl;
+        //return pLeaf;
+        pres = pLeaf;
+
+        size_t posLenSep = nwStringPart.find(':');
+        if (posLenSep != string::npos)
+        {
+            //
+            posLenBegin = posLenSep + 1;
+        }
+    }
+    else
+    {
+        // This is not a leaf
+        // so we create underlying level for it
+        int idToUse = 1000;
+        if (fBottomUp == false)
+        {
+            idToUse = invId++;
+        }
+        TreeNode *pInternal = new TreeNode(idToUse);
+        int lastpos = 1;
+        int curpos = 0;
+        int parnet = 0; // (: +1, ) -1
+        while (true)
+        {
+            //cout << "curpos = " << curpos << endl;
+
+            if (curpos >= (int)nwStringPart.size())
+            {
+                // we are done
+                break;
+            }
+
+            // keep balance
+            if (nwStringPart[curpos] == '(')
+            {
+                parnet++;
+            }
+            else if (nwStringPart[curpos] == ')')
+            {
+                parnet--;
+
+                // when parnet = 0, we know we end
+                if (parnet == 0)
+                {
+                    // now adding the last piece
+                    // create a new node
+                    int strl = curpos - lastpos;
+                    string subs = nwStringPart.substr(lastpos, strl);
+                    //    cout << "last subs = " << subs << endl;
+                    TreeNode *pChild = ConsOnNewickSubtree(subs, leafId, invId, numLeaves, fBottomUp, pTMapper);
+
+                    // also append it as child
+                    vector<int> empytLabels;
+                    pInternal->AddChild(pChild, empytLabels);
+
+                    // aslo update lastpos
+                    lastpos = curpos + 1;
+                }
+            }
+            else if (nwStringPart[curpos] == ',')
+            {
+                // Yes, this is a sepeartor, but we only start to process it when the
+                // balance of parenetnis is right
+                if (parnet == 1)
+                {
+                    // create a new node
+                    int strl = curpos - lastpos;
+                    string subs = nwStringPart.substr(lastpos, strl);
+                    //    cout << "subs = " << subs << endl;
+                    TreeNode *pChild = ConsOnNewickSubtree(subs, leafId, invId, numLeaves, fBottomUp, pTMapper);
+
+                    // also append it as child
+                    vector<int> empytLabels;
+                    pInternal->AddChild(pChild, empytLabels);
+
+                    // aslo update lastpos
+                    lastpos = curpos + 1;
+                }
+            }
+            else if (nwStringPart[curpos] == ':')
+            {
+                // keep track of length
+                if (parnet == 0)
+                {
+                    posLenBegin = curpos + 1;
+                }
+            }
+
+            // now move to next pos
+            curpos++;
+        }
+
+        // if we go bottom up labeling the node, we should re-label the node here
+        if (fBottomUp == true)
+        {
+            pInternal->SetID(invId++);
+        }
+        //return pInternal;
+        pres = pInternal;
+    }
+
+    //
+    if (posLenBegin >= 0)
+    {
+        // also read in length
+        size_t posRightExt = nwStringPart.find(')', posLenBegin);
+        int rightPos = (int)nwStringPart.size() - 1;
+        if (posRightExt != string::npos)
+        {
+            rightPos = posRightExt - 1;
+        }
+        string subs = nwStringPart.substr(posLenBegin, posRightExt - posLenBegin + 1);
+        double len = StrToDouble(subs);
+        pres->SetLength(len);
+    }
+    return pres;
+}
+
+TreeNode *PhylogenyTreeBasic ::ConsOnNewickSubtreeDupLabels(const string &nwStringPart, int &invId, int &leafId, TaxaMapper *pTMapper)
+{
+    //cout << "Entry nwStringPart = "<< nwStringPart << endl;
+
+    // this function builds recursively subtrees for this part of string
+    // First, is this string a leaf or not
+    if (nwStringPart[0] != '(')
+    {
+        // ensure no internal has every been set yet
+        //YW_ASSERT_INFO( invId < 0, "invId should not be set when leaf is being processed" );
+
+        // Yes, this is a leaf
+        int nodeId = leafId;
+        leafId++;
+        int leafLabel = TaxaMapper ::GetIdFromStr(nwStringPart, pTMapper);
+        //sscanf( nwStringPart.c_str(), "%d", &leafLabel  );
+
+        //cout << "leaf id = " << nodeId << endl;
+        TreeNode *pLeaf = new TreeNode(nodeId);
+        // also set its label this way
+        pLeaf->AddNodeValue(nodeId);
+
+        // get rid of any part after : if there is length info
+        //string strLeafLabel = nwStringPart;
+        //if( strLa )
+        //{
+        //}
+        char buf[1000];
+        sprintf(buf, "%d", leafLabel);
+        string strLabel = buf;
+        pLeaf->SetLabel(strLabel);
+
+        string strLabelUser = TaxaMapper ::ExtractIdPartFromStr(nwStringPart);
+        pLeaf->SetUserLabel(strLabelUser);
+
+        //cout << "ConsOnNewickSubtree: set leaf label: " << strLabel << endl;
+        return pLeaf;
+    }
+    else
+    {
+
+        // This is not a leaf
+        // so we create underlying level for it
+        int idToUse = invId;
+        TreeNode *pInternal = new TreeNode(idToUse);
+        int lastpos = 1;
+        int curpos = 0;
+        int parnet = 0; // (: +1, ) -1
+        while (true)
+        {
+            //cout << "curpos = " << curpos << endl;
+
+            if (curpos >= (int)nwStringPart.size())
+            {
+                // we are done
+                break;
+            }
+
+            // keep balance
+            if (nwStringPart[curpos] == '(')
+            {
+                parnet++;
+            }
+            else if (nwStringPart[curpos] == ')')
+            {
+                parnet--;
+
+                // when parnet = 0, we know we end
+                if (parnet == 0)
+                {
+                    // now adding the last piece
+                    // create a new node
+                    int strl = curpos - lastpos;
+                    string subs = nwStringPart.substr(lastpos, strl);
+                    //    cout << "last subs = " << subs << endl;
+                    TreeNode *pChild = ConsOnNewickSubtreeDupLabels(subs, invId, leafId, pTMapper);
+
+                    // also append it as child
+                    vector<int> empytLabels;
+                    pInternal->AddChild(pChild, empytLabels);
+
+                    // aslo update lastpos
+                    lastpos = curpos + 1;
+                }
+            }
+            else if (nwStringPart[curpos] == ',')
+            {
+                // Yes, this is a sepeartor, but we only start to process it when the
+                // balance of parenetnis is right
+                if (parnet == 1)
+                {
+                    // create a new node
+                    int strl = curpos - lastpos;
+                    string subs = nwStringPart.substr(lastpos, strl);
+                    //    cout << "subs = " << subs << endl;
+                    TreeNode *pChild = ConsOnNewickSubtreeDupLabels(subs, invId, leafId, pTMapper);
+
+                    // also append it as child
+                    vector<int> empytLabels;
+                    pInternal->AddChild(pChild, empytLabels);
+
+                    // aslo update lastpos
+                    lastpos = curpos + 1;
+                }
+            }
+
+            // now move to next pos
+            curpos++;
+        }
+
+        // if we go bottom up labeling the node, we should re-label the node here
+        //if(invId < 0 )
+        //{
+        //	invId = leafId;
+        //}
+
+        pInternal->SetID(invId++);
+        //cout << "Set internal node to " << pInternal->GetID() << endl;
+        return pInternal;
+    }
+}
+
+// Get nodes info
+// 7/27/10: we want to get node label (NOT id!)
+void PhylogenyTreeBasic ::GetNodeParInfo(vector<int> &nodeIds, vector<int> &parPos)
+{
+    //cout << "GetNodeParInfo: \n";
+    // simply put consecutive node ids but keep track of node parent positions
+    // ensure we get the correct node mapping between id and pointer to node
+    map<TreeNode *, int> mapNodeIds;
+
+    // id is simply consecutive
+    int numTotVerts = GetNumVertices();
+    nodeIds.resize(numTotVerts);
+    for (int i = 0; i < numTotVerts; ++i)
+    {
+        nodeIds[i] = i;
+    }
+    parPos.resize(numTotVerts);
+    for (int i = 0; i < numTotVerts; ++i)
+    {
+        parPos[i] = -1;
+    }
+
+    // IMPORTANT: assume binary tree, otherwise all bets are off!!!!
+    //int numLeaves = ( numTotVerts+1 )/2;
+    int numLeaves = GetNumLeaves();
+    //cout << "numLeaves: " << numLeaves << endl;
+    // do traversal
+    int curNodeNum = 0;
+    //InitPostorderWalk();
+    PhylogenyTreeIterator itorTree(*this);
+    itorTree.Init();
+    while (itorTree.IsDone() == false)
+    {
+        TreeNode *pn = itorTree.GetCurrNode();
+        itorTree.Next();
+        //TreeNode *pn = NextPostorderWalk( ) ;
+        if (pn == NULL)
+        {
+            //cout << "No node here. Stop.\n";
+            break; // done with all nodes
+        }
+
+        //
+        if (pn->IsLeaf() == true)
+        {
+            // skip it for now
+            continue;
+        }
+
+        //
+        int nonleafInd = numLeaves + curNodeNum;
+        curNodeNum++;
+        // remember it
+        mapNodeIds.insert(map<TreeNode *, int>::value_type(pn, nonleafInd));
+        // now set its descendents to this index, either leaf or non-leaf
+        // if it is non-leaf, do a lookup of the stored id. Leaf: just go by its id
+        for (int jj = 0; jj < pn->GetChildrenNum(); ++jj)
+        {
+            TreeNode *pnjj = pn->GetChild(jj);
+            int pnjjid;
+            int pnjjlabel = -1;
+            if (pnjj->IsLeaf() == true)
+            {
+                pnjjid = pnjj->GetID();
+                // assume id is distinct, while label can be duplicate
+                pnjjlabel = pnjj->GetIntLabel();
+                //cout << "pnjjid = " << pnjjid << ", pnjjlabel: " << pnjjlabel << ", numLeaves: " << numLeaves << endl;
+                YW_ASSERT_INFO(pnjjid >= 0 && pnjjid < numLeaves, "Leaf id: out of range");
+            }
+            else
+            {
+                YW_ASSERT_INFO(mapNodeIds.find(pnjj) != mapNodeIds.end(), "Fail to find the node");
+                pnjjid = mapNodeIds[pnjj];
+            }
+            parPos[pnjjid] = nonleafInd;
+            // this says whether we change the label of the node
+            // this is needed when there are duplicate labels in the tree
+            if (pnjjlabel >= 0)
+            {
+                nodeIds[pnjjid] = pnjjlabel;
+            }
+        }
+    }
+
+    // print out
+    //cout << "original tree:  ";
+    //string strTree;
+    //ConsNewick(strTree);
+    //cout << strTree << endl;
+    //cout << "Parent position : ";
+    //DumpIntVec( parPos );
+}
+
+void PhylogenyTreeBasic ::GetNodeParInfoNew(vector<int> &nodeIds, vector<int> &parPos)
+{
+    //cout << "In GetNodeParInfoNew: tree is: ";
+    //this->Dump();
+    // the previous version has various of problems, but it is being used by some programs
+    // so I decide to add a new function
+    // Note this one assume all nodes are labeled consecutively
+    // simply put consecutive node ids but keep track of node parent positions
+    // ensure we get the correct node mapping between id and pointer to node
+    //map<TreeNode *,int> mapNodeIds;
+
+    // id is simply consecutive
+    int numTotVerts = GetNumVertices();
+    //nodeIds.resize(numTotVerts);
+    //for(int i=0; i<numTotVerts; ++i)
+    //{
+    //	nodeIds[i] = i;
+    //}
+    //parPos.resize(numTotVerts);
+    //for(int i=0; i<numTotVerts; ++i)
+    //{
+    //	parPos[i] = -1;
+    //}
+
+    // IMPORTANT: assume binary tree, otherwise all bets are off!!!!
+    //int numLeaves = ( numTotVerts+1 )/2;
+    int numLeaves = GetNumLeaves();
+    //cout << "Numleaves = " << numLeaves << endl;
+    // do traversal
+    //int curNodeNum = 0;
+    //InitPostorderWalk();
+    PhylogenyTreeIterator itorTree(*this);
+    itorTree.Init();
+    while (itorTree.IsDone() == false)
+    {
+        TreeNode *pn = itorTree.GetCurrNode();
+        itorTree.Next();
+        //TreeNode *pn = NextPostorderWalk( ) ;
+        if (pn == NULL)
+        {
+            //cout << "No node here. Stop.\n";
+            break; // done with all nodes
+        }
+
+        //
+        int curNodeId = pn->GetID();
+        //cout << "curNodeId: " << curNodeId << endl;
+        YW_ASSERT_INFO(curNodeId < numTotVerts, "curNodeId exceeds limit (the node ids must be consecutive from 0)");
+        if (pn->IsLeaf() == true)
+        {
+            // skip it for now
+            YW_ASSERT_INFO(curNodeId < numLeaves, "The tree violates assumption that tree leaf id start from 0");
+        }
+
+        // add a record
+        nodeIds.push_back(pn->GetID());
+        TreeNode *pnPar = pn->GetParent();
+        if (pnPar == NULL)
+        {
+            parPos.push_back(-1);
+        }
+        else
+        {
+            // simply its id
+            parPos.push_back(pnPar->GetID());
+        }
+
+        //	continue;
+        //}
+#if 0
+		//
+		//int nonleafInd = numLeaves + curNodeNum;
+		int nonleafInd = curNodeId;
+		//curNodeNum++;
+		// remember it
+		mapNodeIds.insert( map<TreeNode *,int> :: value_type( pn, curNodeId ) );
+		// now set its descendents to this index, either leaf or non-leaf
+		// if it is non-leaf, do a lookup of the stored id. Leaf: just go by its id
+		for(int jj=0; jj<pn->GetChildrenNum(); ++jj)
+		{
+			TreeNode *pnjj = pn->GetChild(jj);
+			int pnjjid;
+			if( pnjj->IsLeaf() == true )
+			{
+				pnjjid = pnjj->GetID();
+				YW_ASSERT_INFO( pnjjid >=0 && pnjjid < numLeaves, "Leaf id: out of range" );
+			}
+			else
+			{
+				YW_ASSERT_INFO( mapNodeIds.find( pnjj ) != mapNodeIds.end(), "Fail to find the node"  );
+				pnjjid = mapNodeIds[pnjj];
+			}
+			parPos[pnjjid] = nonleafInd;
+#endif
+        //}
+    }
+
+    // print out
+    //cout << "original tree:  ";
+    //string strTree;
+    //ConsNewick(strTree);
+    //cout << strTree << endl;
+    //cout << "Parent position : ";
+    //DumpIntVec( parPos );
+}
+
+//
+bool PhylogenyTreeBasic ::ConsOnParPosList(const vector<int> &parPos, int numLeaves, bool fBottupUpLabel)
+{
+    //
+    string strNewick;
+    if (ConvParPosToNewick(parPos, strNewick) == false)
+    {
+        return false;
+    }
+    //cout << "Newick string = " << strNewick << endl;
+    ConsOnNewick(strNewick, numLeaves, fBottupUpLabel);
+    return true;
+}
+
+bool PhylogenyTreeBasic ::ConvParPosToNewick(const vector<int> &parPos, string &strNewick)
+{
+    // convert par position representation to newick
+    // we always assume the last item is -1
+    YW_ASSERT_INFO(parPos[parPos.size() - 1] == -1, "Must be -1 for the last value in parPos");
+    ConvParPosToNewickSubtree(parPos.size() - 1, parPos, strNewick);
+    return true;
+}
+
+void PhylogenyTreeBasic ::ConvParPosToNewickSubtree(int nodeInd, const vector<int> &parPos, string &strNewick)
+{
+    // this function generate under a single node (leaf or non-leaf), the newick under the subtree
+    vector<int> listUnderNodeInds;
+    for (int i = 0; i < (int)parPos.size(); ++i)
+    {
+        if (parPos[i] == nodeInd)
+        {
+            listUnderNodeInds.push_back(i);
+        }
+    }
+    // leaf if empty
+    if (listUnderNodeInds.size() == 0)
+    {
+        char buf[100];
+        sprintf(buf, "%d", nodeInd);
+        strNewick = buf;
+        return;
+    }
+    YW_ASSERT_INFO(listUnderNodeInds.size() == 2, "Only binary trees are supported for now");
+
+    // now get newick for the two part and merge it
+    string strFirst, strSecond;
+    ConvParPosToNewickSubtree(listUnderNodeInds[0], parPos, strFirst);
+    ConvParPosToNewickSubtree(listUnderNodeInds[1], parPos, strSecond);
+    strNewick = "(";
+    strNewick += strFirst;
+    strNewick += ",";
+    strNewick += strSecond;
+    strNewick += ")";
+}
+
+void PhylogenyTreeBasic ::GetLeaveIds(set<int> &lvids)
+{
+    lvids.clear();
+
+    PhylogenyTreeIterator itorTree(*this);
+    itorTree.Init();
+    while (itorTree.IsDone() == false)
+    {
+        TreeNode *pn = itorTree.GetCurrNode();
+        itorTree.Next();
+        if (pn == NULL)
+        {
+            break; // done with all nodes
+        }
+        if (pn->IsLeaf() == true)
+        {
+            lvids.insert(pn->GetID());
+        }
+    }
+}
+void PhylogenyTreeBasic ::GetLeafIntLabels(set<int> &setIntLabels)
+{
+    vector<TreeNode *> listLeafNodes;
+    GetAllLeafNodes(listLeafNodes);
+    setIntLabels.clear();
+    for (int i = 0; i < (int)listLeafNodes.size(); ++i)
+    {
+        setIntLabels.insert(listLeafNodes[i]->GetIntLabel());
+    }
+}
+
+void PhylogenyTreeBasic::GetLeavesIdsWithLabel(const string &label, set<int> &lvids)
+{
+    lvids.clear();
+    PhylogenyTreeIterator itorTree(*this);
+    itorTree.Init();
+    while (itorTree.IsDone() == false)
+    {
+        TreeNode *pn = itorTree.GetCurrNode();
+        //cout << "GetLeavesIdsWithLabel: ";
+        //cout << pn->GetLabel() << endl;
+        itorTree.Next();
+        if (pn == NULL)
+        {
+            break; // done with all nodes
+        }
+        if (pn->GetLabel() == label)
+        {
+            lvids.insert(pn->GetID());
+        }
+    }
+}
+
+void PhylogenyTreeBasic ::GetLeavesWithLabels(const set<string> &setLabels, set<TreeNode *> &setLvNodes)
+{
+    //
+    setLvNodes.clear();
+    PhylogenyTreeIterator itorTree(*this);
+    itorTree.Init();
+    while (itorTree.IsDone() == false)
+    {
+        TreeNode *pn = itorTree.GetCurrNode();
+        //cout << "GetLeavesIdsWithLabel: ";
+        //cout << pn->GetLabel() << endl;
+        itorTree.Next();
+        if (pn == NULL)
+        {
+            break; // done with all nodes
+        }
+        if (setLabels.find(pn->GetLabel()) != setLabels.end())
+        {
+            setLvNodes.insert(pn);
+        }
+    }
+}
+
+void PhylogenyTreeBasic ::UpdateIntLabel(const vector<int> &listLabels)
+{
+    // by assumption, id is from 0 to the following
+    PhylogenyTreeIterator itorTree(*this);
+    itorTree.Init();
+    while (itorTree.IsDone() == false)
+    {
+        TreeNode *pn = itorTree.GetCurrNode();
+        itorTree.Next();
+        if (pn == NULL)
+        {
+            break; // done with all nodes
+        }
+        //cout << "node id = " << pn->GetID() << endl;
+
+        YW_ASSERT_INFO(pn->GetID() < (int)listLabels.size(), "Tree id: over limit");
+        int lblInt = listLabels[pn->GetID()];
+        char strbuf[100];
+        sprintf(strbuf, "%d", lblInt);
+        string lblNew = strbuf;
+        pn->SetLabel(lblNew);
+    }
+}
+
+void PhylogenyTreeBasic ::Reroot(TreeNode *pRootDesc)
+{
+    YW_ASSERT_INFO(pRootDesc != NULL, "Can not take NULL pointer");
+    // if the node is set ot be root, nothing to be done
+    if (pRootDesc == rootNode)
+    {
+        return;
+    }
+    //cout << "pass1\n";
+    // create a new node
+    //vector<int> dummyLbls;
+    TreeNode *pRootNew = new TreeNode(rootNode->GetID());
+    TreeNode *pRootOtherDesc = pRootDesc->GetParent();
+    YW_ASSERT_INFO(pRootOtherDesc != NULL, "TBD");
+    vector<int> lblsNew;
+    // for now, concerntrate the labels without SPLITTING
+    pRootOtherDesc->GetEdgeLabelsToChild(pRootDesc, lblsNew);
+    pRootOtherDesc->RemoveChild(pRootDesc);
+    pRootNew->AddChild(pRootDesc, lblsNew);
+    //cout << "pass2\n";
+    //
+    TreeNode *pCurNode = pRootOtherDesc;
+    TreeNode *pCurNodePar = pRootNew;
+    while (true)
+    {
+        // setup the ancestral relationship
+        YW_ASSERT_INFO(pCurNode != NULL && pCurNodePar != NULL, "Something wrong");
+        //cout << "BEFORE CHANGING...\n";
+        //cout << "pCurNode: label =" << pCurNode->GetLabel() << ", ID = " << pCurNode->GetID() << ", num of children " << pCurNode->GetChildrenNum() << endl;
+        //for( int pp=0; pp< pCurNode->GetChildrenNum(); ++pp )
+        //{
+        //cout << "** Child: " << pCurNode->GetChild(pp)->GetID() << endl;
+        //}
+        //cout << "pCurNodePar: label =" << pCurNodePar->GetLabel() << ", ID = " << pCurNodePar->GetID()  << ", num of children " << pCurNodePar->GetChildrenNum()  << endl;
+        //for( int pp=0; pp< pCurNodePar->GetChildrenNum(); ++pp )
+        //{
+        //cout << "** Child: " << pCurNodePar->GetChild(pp)->GetID() << endl;
+        //}
+        vector<int> lblsNew;
+        pCurNode->GetEdgeLabelsToChild(pCurNodePar, lblsNew);
+        TreeNode *pNodeNext = pCurNode->GetParent();
+        pCurNode->RemoveChild(pCurNodePar);
+        //pCurNode->SetParent(pCurNodePar);
+        pCurNodePar->AddChild(pCurNode, lblsNew);
+
 #if 0
+		vector<TreeNode *> listParChildren;
+		for(int c=0; c<(int)pCurNode->GetChildrenNum(); ++c  )
+		{
+			//if( pCurNode->GetChild(c) != pCurNode )
+			//{
+			listParChildren.push_back( pCurNode->GetChild(c) ) ;
+			//}
+		}
+		for(int c=0; c<(int)listParChildren.size(); ++c  )
+		{
+			//if( pCurNode->GetChild(c) != pCurNode )
+			//{
+			pCurNode->RemoveChild( listParChildren[c] ) ;
+			//}
+		}
+		// add these to the descendent of the new par
+		for( int c=0; c<(int)listParChildren.size(); ++c )
+		{
+			vector<int> emptyLbls;
+			pCurNodePar->AddChild(listParChildren[c], emptyLbls);
+		}
+#endif
+
+        //cout << "AFTER CHANGING...\n";
+        //cout << "pCurNode: label =" << pCurNode->GetLabel() << ", ID = " << pCurNode->GetID() << ", num of children " << pCurNode->GetChildrenNum() << endl;
+        //for( int pp=0; pp< pCurNode->GetChildrenNum(); ++pp )
+        //{
+        //cout << "** Child: " << pCurNode->GetChild(pp)->GetID() << endl;
+        //}
+        //cout << "pCurNodePar: label =" << pCurNodePar->GetLabel() << ", ID = " << pCurNodePar->GetID()  << ", num of children " << pCurNodePar->GetChildrenNum()  << endl;
+        //for( int pp=0; pp< pCurNodePar->GetChildrenNum(); ++pp )
+        //{
+        //cout << "** Child: " << pCurNodePar->GetChild(pp)->GetID() << endl;
+        //}
+
+        // find the other descendents of the par
+        if (pNodeNext == NULL)
+        {
+            vector<TreeNode *> listParChildren;
+            for (int c = 0; c < (int)pCurNode->GetChildrenNum(); ++c)
+            {
+                //if( pCurNode->GetChild(c) != pCurNode )
+                //{
+                listParChildren.push_back(pCurNode->GetChild(c));
+                //}
+            }
+            for (int c = 0; c < (int)listParChildren.size(); ++c)
+            {
+                //if( pCurNode->GetChild(c) != pCurNode )
+                //{
+                pCurNode->RemoveChild(listParChildren[c]);
+                //}
+            }
+            // add these to the descendent of the new par
+            for (int c = 0; c < (int)listParChildren.size(); ++c)
+            {
+                vector<int> lblsNew;
+                pCurNode->GetEdgeLabelsToChild(listParChildren[c], lblsNew);
+
+                //vector<int> emptyLbls;
+                pCurNodePar->AddChild(listParChildren[c], lblsNew);
+            }
+            pCurNodePar->RemoveChild(pCurNode);
+
+            //cout << "FINALLY...\n";
+            //cout << "pCurNode: label =" << pCurNode->GetLabel() << ", ID = " << pCurNode->GetID() << ", num of children " << pCurNode->GetChildrenNum() << endl;
+            //for( int pp=0; pp< pCurNode->GetChildrenNum(); ++pp )
+            //{
+            //cout << "** Child: " << pCurNode->GetChild(pp)->GetID() << endl;
+            //}
+            //cout << "pCurNodePar: label =" << pCurNodePar->GetLabel() << ", ID = " << pCurNodePar->GetID()  << ", num of children " << pCurNodePar->GetChildrenNum()  << endl;
+            //for( int pp=0; pp< pCurNodePar->GetChildrenNum(); ++pp )
+            //{
+            //cout << "** Child: " << pCurNodePar->GetChild(pp)->GetID() << endl;
+            //}
+            // done. pCurNode is the root, we should by-pass this node and assign
+            // their children to pCurNodePar
+            break;
+        }
+        //
+        pCurNodePar = pCurNode;
+        pCurNode = pNodeNext;
+    }
+
+    // finally get rid of the original root
+    delete rootNode;
+    rootNode = pRootNew;
+}
+
+int PhylogenyTreeBasic ::GetNumLeaves()
+{
+    if (numLeaves > 0)
+    {
+        return numLeaves;
+    }
+    set<int> lvids;
+    GetLeaveIds(lvids);
+    numLeaves = lvids.size();
+    return numLeaves;
+}
+
+int PhylogenyTreeBasic ::GetNumInternalNodes()
+{
+    //
+    vector<TreeNode *> listAllNodes;
+    GetAllNodes(listAllNodes);
+    int res = 0;
+    for (int i = 0; i < (int)listAllNodes.size(); ++i)
+    {
+        if (listAllNodes[i]->IsLeaf() == false)
+        {
+            //
+            ++res;
+        }
+    }
+    return res;
+}
+
+void PhylogenyTreeBasic ::GetAllLeafNodes(vector<TreeNode *> &listLeafNodes) const
+{
+    listLeafNodes.clear();
+
+    PhylogenyTreeBasic &refSelf = const_cast<PhylogenyTreeBasic &>(*this);
+    PhylogenyTreeIterator itorTree(refSelf);
+    itorTree.Init();
+    while (itorTree.IsDone() == false)
+    {
+        TreeNode *pn = itorTree.GetCurrNode();
+        itorTree.Next();
+        if (pn == NULL)
+        {
+            break; // done with all nodes
+        }
+        if (pn->IsLeaf() == true)
+        {
+            listLeafNodes.push_back(pn);
+        }
+    }
+}
+
+void PhylogenyTreeBasic ::GetAllNodes(vector<TreeNode *> &listLeafNodes) const
+{
+    listLeafNodes.clear();
+
+    PhylogenyTreeBasic &refSelf = const_cast<PhylogenyTreeBasic &>(*this);
+    PhylogenyTreeIterator itorTree(refSelf);
+    itorTree.Init();
+    while (itorTree.IsDone() == false)
+    {
+        TreeNode *pn = itorTree.GetCurrNode();
+        itorTree.Next();
+        if (pn == NULL)
+        {
+            break; // done with all nodes
+        }
+        listLeafNodes.push_back(pn);
+    }
+}
+
+// remove all leaf nodes without taxa ids
+void PhylogenyTreeBasic ::CleanNonLabeledLeaves()
+{
+    //cout << "CleanNonLabeledLeaves:\n";
+    // mark all nodes that are on the path from a labeled leaf node to root
+    set<TreeNode *> setNodesNonredundent;
+
+    vector<TreeNode *> listLeafNodes;
+    GetAllLeafNodes(listLeafNodes);
+    for (int ii = 0; ii < (int)listLeafNodes.size(); ++ii)
+    {
+        //cout << "Leaflabel: " << listLeafNodes[ii]->GetLabel() << endl;
+        if (listLeafNodes[ii]->GetLabel().empty() == true || listLeafNodes[ii]->GetLabel() == "-")
+        {
+            //
+            //cout << "This leaf is REDUNDENT\n";
+            continue;
+        }
+
+        TreeNode *pncurr = listLeafNodes[ii];
+        while (pncurr != NULL && setNodesNonredundent.find(pncurr) == setNodesNonredundent.end())
+        {
+
+            //
+            setNodesNonredundent.insert(pncurr);
+
+            //
+            pncurr = pncurr->GetParent();
+        }
+    }
+
+    // now clean it by removing each node that does not appear in that
+    PhylogenyTreeIterator itorTree(*this);
+    itorTree.Init();
+    vector<TreeNode *> listNodesToClean;
+    while (itorTree.IsDone() == false)
+    {
+        TreeNode *pn = itorTree.GetCurrNode();
+        itorTree.Next();
+        if (pn == NULL)
+        {
+            break; // done with all nodes
+        }
+        //cout << "node id = " << pn->GetID() << endl;
+
+        //
+        if (setNodesNonredundent.find(pn) == setNodesNonredundent.end())
+        {
+            // remove it
+            listNodesToClean.push_back(pn);
+        }
+    }
+    // now clean
+    for (int ii = 0; ii < (int)listNodesToClean.size(); ++ii)
+    {
+        //cout << "Remove one node\n";
+        RemoveNode(listNodesToClean[ii]);
+    }
+}
+
+void PhylogenyTreeBasic ::RemoveNode(TreeNode *pn)
+{
+    // remove the node (but does not do anything to its descendent if it has; that is, we assume the node has no children)
+    YW_ASSERT_INFO(pn->IsLeaf() == true, "Wrong: it still have children");
+    TreeNode *pnpar = pn->GetParent();
+    if (pnpar != NULL)
+    {
+        pnpar->RemoveChild(pn);
+    }
+    delete pn;
+}
+
+void PhylogenyTreeBasic ::RemoveNodeKeepChildren(TreeNode *pn)
+{
+    YW_ASSERT_INFO(pn != NULL, "null");
+    //cout << "RemoveNodeKeepChildren: pn: ";
+    //pn->Dump();
+
+    // remove node (and move all its children to be the nodes of the grand par
+    // YW: cannot remove the root this way
+    YW_ASSERT_INFO(pn != GetRoot(), "Cannot remove root this way");
+    TreeNode *pnpar = pn->GetParent();
+    YW_ASSERT_INFO(pnpar != NULL, "Wrong3");
+    pnpar->RemoveChild(pn);
+
+    for (int i = 0; i < pn->GetChildrenNum(); ++i)
+    {
+        vector<int> emptyLbls;
+        pnpar->AddChild(pn->GetChild(i), emptyLbls);
+    }
+    pn->DetachAllChildren();
+    delete pn;
+
+    // remove newly created degree one node
+    RemoveDegreeOneNodeAt(pnpar);
+}
+void PhylogenyTreeBasic ::RemoveDegreeOneNodeAt(TreeNode *pn)
+{
+    //return;
+    //cout << "removing degree one node: ";
+    //pn->Dump();
+    //cout << "Current tree: ";
+    //this->Dump();
+    //exit(1);
+    // remove this node if it is a degree-1 node
+    int numChildren = pn->GetChildrenNum();
+    YW_ASSERT_INFO(numChildren >= 1, "Num of children: at least 1");
+    if (numChildren == 1)
+    {
+        // if root, then delete it and re-set the root
+        if (pn == GetRoot())
+        {
+            //cout << "The degree one node is root!\n";
+            TreeNode *pnchild = pn->GetChild(0);
+            YW_ASSERT_INFO(pnchild != NULL, "pnchild: null");
+            //cout << "pnchild: ";
+            //pnchild->Dump();
+            pnchild->DetachSelf();
+            //cout << "After detach: root: ";
+            //pn->Dump();
+            //pn->DetachAllChildren();
+            pnchild->SetParent(NULL);
+            delete pn;
+            SetRootPlain(pnchild);
+        }
         else
         {
-            // we take the nonde value here
+            // then invoke the removekeepchild
+            RemoveNodeKeepChildren(pn);
+        }
+    }
+    //cout << "Done: RemoveDegreeOneNodeAt. Tree is now: ";
+    //this->Dump();
+}
+
+void PhylogenyTreeBasic ::RemoveDegreeOneNodes()
+{
+    //
+    vector<TreeNode *> listNodesAll;
+    this->GetAllNodes(listNodesAll);
+    for (int i = 0; i < (int)listNodesAll.size(); ++i)
+    {
+        if (listNodesAll[i]->IsLeaf() == false)
+        {
+            RemoveDegreeOneNodeAt(listNodesAll[i]);
+        }
+    }
+}
+
+void PhylogenyTreeBasic ::RemoveDescendentsFrom(set<TreeNode *> &setTreeNodes)
+{
+    // only keep those whose ancestor is ot in the set given
+    set<TreeNode *> setTreeNodeNew;
+    for (set<TreeNode *>::iterator it = setTreeNodes.begin(); it != setTreeNodes.end(); ++it)
+    {
+        // check whether any of its parent is in the list
+        bool fKeep = true;
+        TreeNode *ppar = (*it)->GetParent();
+        while (ppar != NULL)
+        {
+            if (setTreeNodes.find(ppar) != setTreeNodes.end())
+            {
+                fKeep = false;
+                break;
+            }
+            ppar = ppar->GetParent();
+        }
+        if (fKeep == true)
+        {
+            setTreeNodeNew.insert(*it);
+        }
+    }
+    setTreeNodes = setTreeNodeNew;
+}
+
+// given a set of clusters (subsets of tree taxa), construct the corresponding phylo trees
+// YW: need to allow mulfurcating trees
+void PhylogenyTreeBasic ::ConsPhyTreeFromClusters(const set<set<int>> &setClusters)
+{
+    //cout << "ConsPhyTreeFromClusters :: Cluseters: \n";
+    //for( set< set<int> > :: const_iterator it = setClusters.begin(); it != setClusters.end(); ++it )
+    //{
+    //DumpIntSet( *it );
+    //}
+    // assume all leaves are given as singleton taxon. So first collect those singleton subsets
+    set<set<int>> setSubsetsActive;
+    TreeNode *nodeLast = NULL;
+    map<set<int>, TreeNode *> mapClusterToNode;
+    for (set<set<int>>::const_iterator it = setClusters.begin(); it != setClusters.end(); ++it)
+    {
+        if (it->size() == 1)
+        {
+            // add in setClusters
+            setSubsetsActive.insert(*it);
+            // also create nodes
+            TreeNode *pnode = new TreeNode(*(it->begin()));
             char buf[100];
-            if( pn->nodeValues.size() > 0 )
+            sprintf(buf, "%d", *(it->begin()));
+            string sbuf = buf;
+            pnode->SetLabel(sbuf);
+            nodeLast = pnode;
+            mapClusterToNode.insert(map<set<int>, TreeNode *>::value_type(*it, pnode));
+        }
+    }
+    // setup num of leaves now
+    this->numLeaves = mapClusterToNode.size();
+
+    // need to allow mulfurcating trees
+    // approach: for each cluster, maintain a pointer that points to the cluster that is its parent
+    // then, each time, loop through to find all parents
+    map<set<int>, set<int>> mapClustrToPar;
+    // try to see whether we can create new nodes
+    for (set<set<int>>::iterator it1 = setClusters.begin(); it1 != setClusters.end(); ++it1)
+    {
+        set<set<int>>::iterator it2 = setClusters.begin();
+        ++it2;
+        for (; it2 != setClusters.end(); ++it2)
+        {
+            //
+            set<int> sLarger = *it1;
+            set<int> sSmaller = *it2;
+            if (sLarger.size() < sSmaller.size())
             {
-                sprintf(buf, "(%d)", pn->nodeValues[0] );        // CAUTION, here we assume each leaf has exactly 1 label
-                nameToUse = buf;
+                sLarger = *it2;
+                sSmaller = *it1;
             }
-            else
+            // can these two coalesce into a single cluster known
+            if (sLarger.size() > sSmaller.size() && IsSetContainer(sLarger, sSmaller) == true)
             {
-                // if no nodes value is set, still use label
-         //       nameToUse = pn->GetLabel();
+                if (mapClustrToPar.find(sSmaller) == mapClustrToPar.end() || mapClustrToPar[sSmaller].size() > sLarger.size())
+                {
+                    mapClustrToPar.erase(sSmaller);
+                    mapClustrToPar.insert(map<set<int>, set<int>>::value_type(sSmaller, sLarger));
+                }
+            }
+        }
+    }
 
-                // YW: TBD change
-                nameToUse.empty();
+    // loop until there is only a single subset
+    while (setSubsetsActive.size() > 1)
+    {
+        set<set<int>> setSubsetsActiveNext = setSubsetsActive;
+        //cout << "Current active sets: \n";
+        //for( set< set<int> > :: const_iterator it = setSubsetsActiveNext.begin(); it != setSubsetsActiveNext.end(); ++it )
+        //{
+        //DumpIntSet( *it );
+        //}
+        // try to find several clusters that have the same parent cluster
+        // try to see whether we can create new nodes
+        map<set<int>, set<set<int>>> mapClusterCoal;
+        for (set<set<int>>::iterator it1 = setSubsetsActive.begin(); it1 != setSubsetsActive.end(); ++it1)
+        {
+            // get parent
+            YW_ASSERT_INFO(mapClustrToPar.find(*it1) != mapClustrToPar.end(), "Cluster: not found");
+            if (mapClusterCoal.find(mapClustrToPar[*it1]) == mapClusterCoal.end())
+            {
+                set<set<int>> sempty;
+                mapClusterCoal.insert(map<set<int>, set<set<int>>>::value_type(mapClustrToPar[*it1], sempty));
             }
+            //cout << "Having child cluster: ";
+            //DumpIntSet( mapClustrToPar[*it1] );
+            //cout << ", for child ";
+            //DumpIntSet(*it1);
+            mapClusterCoal[mapClustrToPar[*it1]].insert(*it1);
         }
-#endif
-    const char *name = nameToUse.c_str();
-
-    // 		char name[100];
-    //       if( pn->IsLeaf() == false)
-    //        {
-    //		    name[0] = 'v';
-    //		    sprintf(&name[1], "%d", pn->id);
-    //        }
-    //        else
-    //        {
-    // For leaf, we simply output their value (row number)
-    //            sprintf(name, "%d", pn->nodeValues[0] );        // CAUTION,
-    //            here we assume each leaf has exactly 1 label
-    //        }
-    OutputQuotedString(outFile, name);
-    outFile << endl;
-
-    // See if we need special shape here
-    if (pn->GetShape() == PHY_TN_RECTANGLE) {
-      outFile << "vgj [ \n shape  ";
-      OutputQuotedString(outFile, "Rectangle");
-      outFile << "\n]\n";
-    } else {
-      outFile << "defaultAtrribute   1\n";
-    }
-
-    outFile << "]\n";
-
-    // Now try to get more nodes
-    for (int i = 0; i < (int)pn->listChildren.size(); ++i) {
-      nodesStack.push(pn->listChildren[i]);
-    }
-    // cout << "a.1.2\n";
-  }
-  // cout << "a.1.3\n";
-
-  // Now output all the edges, by again starting from root and output all nodes
-  YW_ASSERT(nodesStack.empty() == true);
-  if (rootNode != NULL) {
-    nodesStack.push(rootNode);
-  }
-  while (nodesStack.empty() == false) {
-    TreeNode *pn = nodesStack.top();
-    nodesStack.pop();
-
-    for (int i = 0; i < (int)pn->listChildren.size(); ++i) {
-
-      // cout << "Output an edge \n";
-      outFile << "edge [\n";
-      outFile << "source " << pn->id << endl;
-      outFile << "target  " << pn->listChildren[i]->id << endl;
-      outFile << "label ";
-      if (pn->listEdgeLabels[i].size() > 0) {
-        string lblName;
-        char name[100];
-        //		        name[0] = 'e';
-        for (int iel = 0; iel < (int)pn->listEdgeLabels[i].size(); ++iel) {
-          sprintf(name, "e%d  ", pn->listEdgeLabels[i][iel]);
-          lblName += name;
-        }
-        OutputQuotedString(outFile, lblName.c_str());
-      } else {
-        OutputQuotedString(outFile, "");
-      }
-      outFile << "\n";
-      outFile << "]\n";
-
-      // Store next one to stack
-      nodesStack.push(pn->listChildren[i]);
-    }
-  }
-
-  // Finally quite after closing file
-  outFile << "\n]\n";
-  outFile.close();
-}
-
-void PhylogenyTreeBasic ::OutputGMLNoLabel(const char *inFileName) {
-  //
-  // Now output a file in GML format
-  // First create a new name
-  string name = inFileName;
-  // cout << "num edges = " << listEdges.size() << endl;
-
-  DEBUG("FileName=");
-  DEBUG(name);
-  DEBUG("\n");
-  // Now open file to write out
-  ofstream outFile(name.c_str());
-
-  // First output some header info
-  outFile << "graph [\n";
-  outFile << "comment ";
-  OutputQuotedString(outFile, "Automatically generated by Graphing tool");
-  outFile << "\ndirected  1\n";
-  outFile << "id  1\n";
-  outFile << "label ";
-  OutputQuotedString(outFile, "Phylogeny Tree....\n");
-
-  // Now output all the vertices
-  //	int i;
-  stack<TreeNode *> nodesStack;
-  if (rootNode != NULL) {
-    nodesStack.push(rootNode);
-  }
-  // cout << "a.1.1\n";
-  while (nodesStack.empty() == false) {
-    TreeNode *pn = nodesStack.top();
-    nodesStack.pop();
-
-    outFile << "node [\n";
-
-    outFile << "id " << pn->id << endl;
-    outFile << "label ";
-    string nameToUse = " ";
-    const char *name = nameToUse.c_str();
-
-    // 		char name[100];
-    //       if( pn->IsLeaf() == false)
-    //        {
-    //		    name[0] = 'v';
-    //		    sprintf(&name[1], "%d", pn->id);
-    //        }
-    //        else
-    //        {
-    // For leaf, we simply output their value (row number)
-    //            sprintf(name, "%d", pn->nodeValues[0] );        // CAUTION,
-    //            here we assume each leaf has exactly 1 label
-    //        }
-    OutputQuotedString(outFile, name);
-    outFile << endl;
-
-    // See if we need special shape here
-    if (pn->GetShape() == PHY_TN_RECTANGLE) {
-      outFile << "vgj [ \n shape  ";
-      OutputQuotedString(outFile, "Rectangle");
-      outFile << "\n]\n";
-    } else {
-      outFile << "defaultAtrribute   1\n";
-    }
-
-    outFile << "]\n";
-
-    // Now try to get more nodes
-    for (int i = 0; i < (int)pn->listChildren.size(); ++i) {
-      nodesStack.push(pn->listChildren[i]);
-    }
-    // cout << "a.1.2\n";
-  }
-  // cout << "a.1.3\n";
-
-  // Now output all the edges, by again starting from root and output all nodes
-  YW_ASSERT(nodesStack.empty() == true);
-  if (rootNode != NULL) {
-    nodesStack.push(rootNode);
-  }
-  while (nodesStack.empty() == false) {
-    TreeNode *pn = nodesStack.top();
-    nodesStack.pop();
-
-    for (int i = 0; i < (int)pn->listChildren.size(); ++i) {
-
-      // cout << "Output an edge \n";
-      outFile << "edge [\n";
-      outFile << "source " << pn->id << endl;
-      outFile << "target  " << pn->listChildren[i]->id << endl;
-      outFile << "label ";
-      if (pn->listEdgeLabels[i].size() > 0) {
-        string lblName;
-        char name[100];
-        //		        name[0] = 'e';
-        for (int iel = 0; iel < (int)pn->listEdgeLabels[i].size(); ++iel) {
-          sprintf(name, "e%d  ", pn->listEdgeLabels[i][iel]);
-          lblName += name;
-        }
-        OutputQuotedString(outFile, lblName.c_str());
-      } else {
-        OutputQuotedString(outFile, "");
-      }
-      outFile << "\n";
-      outFile << "]\n";
-
-      // Store next one to stack
-      nodesStack.push(pn->listChildren[i]);
-    }
-  }
-
-  // Finally quite after closing file
-  outFile << "\n]\n";
-  outFile.close();
-}
 
-// construct a newick string for this tree
-void PhylogenyTreeBasic ::ConsNewick(string &strNewick, bool wGridLen,
-                                     double gridWidth, bool fUseCurLbl) {
-  strNewick.empty();
-
-  // work from this node
-  YW_ASSERT_INFO(rootNode != NULL, "Root is not set");
-  strNewick =
-      ConsNewickTreeNode(rootNode, wGridLen, gridWidth, fUseCurLbl, false);
-}
-
-void PhylogenyTreeBasic ::ConsNewickSorted(string &strNewick, bool wGridLen,
-                                           double gridWidth, bool fUseCurLbl) {
-  strNewick.empty();
-
-  // work from this node
-  YW_ASSERT_INFO(rootNode != NULL, "Root is not set");
-  strNewick =
-      ConsNewickTreeNode(rootNode, wGridLen, gridWidth, fUseCurLbl, true);
-}
-
-void PhylogenyTreeBasic ::ConsNewickEdgeLabel(string &strNewick) {
-  strNewick.empty();
-
-  // work from this node
-  YW_ASSERT_INFO(rootNode != NULL, "Root is not set");
-  strNewick = ConsNewickTreeNode(rootNode, false, 1.0, true, true, true);
-}
-
-string PhylogenyTreeBasic ::ConsNewickTreeNode(TreeNode *pNode, bool wGridLen,
-                                               double gridWidth,
-                                               bool fUseCurLbl, bool fSort,
-                                               bool fEdgeLbel) {
-  // cout << "--------------------------------In ConsNewickTreeNode: I am
-  // here\n";
-  string resNodeStr;
-  // Is this node a leaf? If so, we output the label of it
-  if (pNode->IsLeaf() == true) {
-    // Add this label if this label is not there
-    string tmpstr = pNode->GetUserLabel();
-    if (fUseCurLbl == true) {
-      tmpstr = pNode->GetLabel();
-    }
-    resNodeStr = tmpstr;
-  } else {
-    string tmpstr = pNode->GetLabel();
-    YW_ASSERT_INFO(pNode->listChildren.size() >= 1,
-                   "Must have some children here.");
-
-    // When there is only one child and no self-label
-    if (tmpstr.size() <= 2 && pNode->listChildren.size() == 1) {
-      resNodeStr = ConsNewickTreeNode(pNode->listChildren[0], wGridLen,
-                                      gridWidth, fUseCurLbl, fSort, fEdgeLbel);
-    } else {
-
-      // Otherwise, we simply collect all sub strings here, and sepearate by a ,
-      string comboStrName = "(";
-
-      bool fAddSep = false;
-      // does this node has a label by itself? if so, output it
-      if (tmpstr.size() > 2) {
-        comboStrName += tmpstr.substr(1, tmpstr.size() - 2);
-        // comboStrName += ",";
-
-        // all others should be added sep.
-        fAddSep = true;
-      }
-
-      // handle its children
-      if (fSort == false) {
-        for (unsigned int i = 0; i < pNode->listChildren.size(); ++i) {
-          string stepRes =
-              ConsNewickTreeNode(pNode->listChildren[i], wGridLen, gridWidth,
-                                 fUseCurLbl, fSort, fEdgeLbel);
-
-          if (stepRes.size() > 0) {
-            if (fAddSep == true) {
-              comboStrName += ",";
+        // now process each record
+        for (map<set<int>, set<set<int>>>::iterator it2 = mapClusterCoal.begin(); it2 != mapClusterCoal.end(); ++it2)
+        {
+            //YW_ASSERT_INFO( it2->second.size() > 1, "Must have at least two coalescing" );
+            //cout << "Set parent: ";
+            //DumpIntSet(it2->first);
+            set<int> sunion;
+            for (set<set<int>>::iterator it3 = it2->second.begin(); it3 != it2->second.end(); ++it3)
+            {
+                //cout << "Set child: ";
+                //DumpIntSet(*it3);
+                // can these two coalesce into a single cluster known
+                UnionSets(sunion, *it3);
+            }
+            //cout << "sunion = ";
+            //DumpIntSet( sunion );
+            // ensure these do coal into some meaningful cluster
+            if (setClusters.find(sunion) == setClusters.end())
+            {
+                //cout << "This set not complete\n";
+                // this cluster not done yet
+                continue;
             }
 
-            comboStrName += stepRes;
+            // create this new node
+            TreeNode *pnode = new TreeNode;
+            nodeLast = pnode;
+            for (set<set<int>>::iterator it3 = it2->second.begin(); it3 != it2->second.end(); ++it3)
+            {
+                //cout << "Processing first subset: ";
+                //DumpIntSet( *it1 );
+                //cout << "Processing second subset: ";
+                //DumpIntSet( *it2 );
+                // these two add up to an input cluster and so create a new node for it
+                YW_ASSERT_INFO(mapClusterToNode.find(*it3) != mapClusterToNode.end(), "Fail1");
+                vector<int> emptyLabels;
+                pnode->AddChild(mapClusterToNode[*it3], emptyLabels);
+                setSubsetsActiveNext.erase(*it3);
+            }
+            mapClusterToNode.insert(map<set<int>, TreeNode *>::value_type(sunion, pnode));
+            setSubsetsActiveNext.insert(sunion);
+            //cout << "Creating node: " << endl;
+        }
+        // must make progress
+        YW_ASSERT_INFO(setSubsetsActive != setSubsetsActiveNext, "Did not make progress");
+        setSubsetsActive = setSubsetsActiveNext;
+    }
+    YW_ASSERT_INFO(nodeLast != NULL, "nodeLast: NULL");
+    SetRoot(nodeLast);
+}
 
-            // from now on, add sep
-            fAddSep = true;
+// find the set of clades in the subtree specified by the given leaf nodes
+void PhylogenyTreeBasic ::FindCladeOfSubsetLeaves(const set<TreeNode *> &setLeaves, set<set<TreeNode *>> &setSubtreeClades)
+{
+    // caution: do not check whether these are true leaves
+    TreeNode *pRoot = this->GetRoot();
+    set<TreeNode *> setAllNodes;
+    pRoot->GetAllDescendents(setAllNodes);
 
-            // if( i+1 < pNode->listChildren.size() )
-            //{
-            //    comboStrName += ",";
-            //}
-          }
-        }
-      } else {
-        // sort the labels from children
-        multiset<string> strsChildren;
-        for (unsigned int i = 0; i < pNode->listChildren.size(); ++i) {
-          string stepRes =
-              ConsNewickTreeNode(pNode->listChildren[i], wGridLen, gridWidth,
-                                 fUseCurLbl, fSort, fEdgeLbel);
-          if (stepRes.size() > 0) {
-            strsChildren.insert(stepRes);
-          }
-        }
-        for (multiset<string>::iterator it = strsChildren.begin();
-             it != strsChildren.end(); ++it) {
-          //
-          if (fAddSep == true) {
-            comboStrName += ",";
-          }
-
-          comboStrName += *it;
-
-          // from now on, add sep
-          fAddSep = true;
-        }
-      }
-      comboStrName += ")";
-      // cout << "comboStrName = " << comboStrName << endl;
-      resNodeStr = comboStrName;
-    }
-  }
-
-  // now see if we need to add length info
-  //
-  if (wGridLen == true) {
     //
-    TreeNode *pNodePar = pNode->GetParent();
-    if (pNodePar != NULL) {
-      double len = gridWidth * (pNodePar->GetLevel() - pNode->GetLevel());
-      // cout << "**************************PhylogenyTreeBasic::len = " << len
-      // << endl;
-      char buf[100];
-      sprintf(buf, ":%f", len);
-      resNodeStr += buf;
-    }
-  } else if (pNode->GetLength() >= 0.0) {
-#if 0
-        // if length is set, add it
-        resNodeStr += ":";
-        resNodeStr += ConvToString(pNode->GetLength() );
-#endif
-  }
-
-  if (fEdgeLbel) {
-    TreeNode *pParNode = pNode->GetParent();
-    if (pParNode != NULL) {
-      int cIndex = pParNode->GetChildIndex(pNode);
-
-      // add edge label in the format: s1s2s3....
-      string strEdgeLbel;
-      vector<int> listEdgeLabels;
-      pParNode->GetEdgeLabelsAtBranch(cIndex, listEdgeLabels);
-
-      // cout << "cIndex: " << cIndex <<", listEdgeLabels: ";
-      // DumpIntVec(listEdgeLabels);
-
-      for (int i = 0; i < (int)listEdgeLabels.size(); ++i) {
-        char buf[10000];
-        sprintf(buf, "#%d", listEdgeLabels[i]);
-        strEdgeLbel += buf;
-      }
-      if (strEdgeLbel.length() > 0) {
-        resNodeStr += ":";
-        resNodeStr += strEdgeLbel;
-      }
+    for (set<TreeNode *>::iterator it = setAllNodes.begin(); it != setAllNodes.end(); ++it)
+    {
+        //
+        set<TreeNode *> setLeavesUnder;
+        (*it)->GetAllLeavesUnder(setLeavesUnder);
+        set<TreeNode *> setLeavesSS;
+        JoinSetsGen(setLeavesUnder, setLeaves, setLeavesSS);
+        if (setLeavesSS.size() > 0)
+        {
+            setSubtreeClades.insert(setLeavesSS);
+        }
     }
-  }
+}
+
+// find the set of clades in the subtree specified by the given leaf nodes
+void PhylogenyTreeBasic ::FindCladeOfSubsetLeavesExact(const set<TreeNode *> &setLeaves, set<set<TreeNode *>> &setSubtreeClades)
+{
+    // caution: do not check whether these are true leaves
+    TreeNode *pRoot = this->GetRoot();
+    set<TreeNode *> setAllNodes;
+    pRoot->GetAllDescendents(setAllNodes);
 
-  return resNodeStr;
+    //
+    for (set<TreeNode *>::iterator it = setAllNodes.begin(); it != setAllNodes.end(); ++it)
+    {
+        //
+        set<TreeNode *> setLeavesUnder;
+        (*it)->GetAllLeavesUnder(setLeavesUnder);
+        set<TreeNode *> setLeavesSS;
+        JoinSetsGen(setLeavesUnder, setLeaves, setLeavesSS);
+        if (setLeavesSS == setLeavesUnder)
+        {
+            setSubtreeClades.insert(setLeavesSS);
+        }
+    }
 }
 
-// This function adds a new tree node, and return it. Also set the parent node
-// to the pareamter
-TreeNode *PhylogenyTreeBasic ::AddTreeNode(TreeNode *parNode, int id) {
-  if (id < 0) {
-    id = GetNumVertices();
-  }
+void PhylogenyTreeBasic ::GroupLeavesToSubtrees(const set<TreeNode *> &setLeaves, const set<set<TreeNode *>> &cladeNodesToProc, set<set<TreeNode *>> &setSubtreeClades)
+{
+    // group the leaves into subtrees (i.e. the subtrees contains exactly those appear in the leaves
+    // YW: note this is not the most realistic way (say you have one noisy leaf sepearting two otherwise fully connected catepillar tree,
+    // then the result willl be a lot more trees to use). But this servers as a starting point
+    // YW: here, we are given some subset out of some pre-specified leaf set, and some subsets (clades) over these leaves;
+    // we want to find the set of maximal clades containing partition these leaves
+    //TreeNode *pRoot = this->GetRoot();
+    //set<TreeNode *> setAllNodes;
+    //pRoot->GetAllDescendents(setAllNodes);
+
+    // order based on the size
+    map<int, set<set<TreeNode *>>> mapSubtreeSz;
+    //for( set<TreeNode *> :: iterator it = setAllNodes.begin(); it != setAllNodes.end(); ++it)
+    for (set<set<TreeNode *>>::const_iterator it = cladeNodesToProc.begin(); it != cladeNodesToProc.end(); ++it)
+    {
+        //
+        //set<TreeNode *> setLeavesUnder;
+        //(*it)->GetAllLeavesUnder( setLeavesUnder );
+        if (mapSubtreeSz.find(it->size()) == mapSubtreeSz.end())
+        {
+            set<set<TreeNode *>> ss;
+            mapSubtreeSz.insert(map<int, set<set<TreeNode *>>>::value_type(it->size(), ss));
+        }
+        mapSubtreeSz[it->size()].insert(*it);
+    }
 
-  TreeNode *pnode = new TreeNode(id);
-  pnode->AddNodeValue(id);
+    // reverse order
+    set<TreeNode *> setNodesProc = setLeaves;
+    for (map<int, set<set<TreeNode *>>>::reverse_iterator rit = mapSubtreeSz.rbegin(); rit != mapSubtreeSz.rend(); ++rit)
+    {
+        //
+        for (set<set<TreeNode *>>::iterator itg = rit->second.begin(); itg != rit->second.end(); ++itg)
+        {
+            //
+            set<TreeNode *> setLeavesSS;
+            JoinSetsGen(*itg, setNodesProc, setLeavesSS);
+            if (setLeavesSS.size() == itg->size())
+            {
+                // find a good match here, use it
+                setSubtreeClades.insert(*itg);
+                SubtractSetsGen(setNodesProc, *itg);
+            }
+        }
+        if (setNodesProc.size() == 0)
+        {
+            break;
+        }
+    }
+    YW_ASSERT_INFO(setNodesProc.size() == 0, "Fail to classify all subtrees");
+}
 
-  // Should delete the tree
-  if (parNode == NULL) {
-    YW_ASSERT_INFO(
-        rootNode == NULL,
-        "Can not add a node with no parent if the tree is not empty");
-    rootNode = pnode;
-    return pnode;
-  }
-
-  // Otherwise, set the parent
-  SEQUENCE emptySeq;
-  parNode->AddChild(pnode, emptySeq);
-  return pnode;
-}
-
-int PhylogenyTreeBasic ::GetNumVertices() const {
-  int res = 0;
-  stack<TreeNode *> stackNodes;
-  if (rootNode != NULL) {
-    stackNodes.push(rootNode);
-  }
-  while (stackNodes.empty() == false) {
-    TreeNode *pcurr = stackNodes.top();
-    stackNodes.pop();
-    ++res;
-    // Now enque its children
-    for (int i = 0; i < (int)pcurr->listChildren.size(); ++i) {
-      stackNodes.push(pcurr->listChildren[i]);
-    }
-  }
-  return res;
-}
-
-// int PhylogenyTreeBasic :: GetIdFromStr( const string &strPart, TaxaMapper
-// *pTMapper )
-//{
-// cout << "GetIdFromStr: " << strPart << endl;
-//	string strToUse = strPart;
-//	size_t posSeparator = strPart.find( ':' );
-//	if( posSeparator != string::npos )
-//	{
-//		strToUse = strPart.substr(0, (int)posSeparator  );
-//	}
-//	// get rid of
-//	int res = -1;
-//	if( pTMapper == NULL)
-//	{
-//		sscanf( strToUse.c_str(), "%d", &res  );
-// cout << "Empty mapper\n";
-//	}
-//	else
-//	{
-//		// are we reading in the first tree or not
-//		if( pTMapper->IsInitialized() == true )
-//		{
-//			res  = pTMapper->GetId(strToUse);
-// cout << "GetIdFromStr: GetId: " << strToUse << ": " << res << endl;
-//		}
-//		else
-//		{
-//			// this is new
-//			res = pTMapper->AddTaxaString( strToUse );
-// cout << "GetIdFromStr: New id: " << strToUse << ": " << res << endl;
-//		}
-//	}
-//	return res;
-//}
+void PhylogenyTreeBasic ::GroupLeavesToSubtreesSamePar(const set<TreeNode *> &setLeaves, const set<set<TreeNode *>> &cladeNodesToProc, set<set<TreeNode *>> &setSubtreeClades)
+{
+    // group leaves that form subtrees w/ same parents. Difference from above: for two subtrees that share the same parent
+    // but could be other branches, put the together
+    GroupLeavesToSubtrees(setLeaves, cladeNodesToProc, setSubtreeClades);
+    // now see whether we can combine subtrees s.t. the combined one is still contined in some parent
+    map<set<TreeNode *>, set<TreeNode *>> mapSubtreesToPar;
+    for (set<set<TreeNode *>>::iterator it = setSubtreeClades.begin(); it != setSubtreeClades.end(); ++it)
+    {
+        for (set<set<TreeNode *>>::iterator itg = cladeNodesToProc.begin(); itg != cladeNodesToProc.end(); ++itg)
+        {
+            //
+            if (*itg != *it && itg->size() > it->size() && (mapSubtreesToPar.find(*it) == mapSubtreesToPar.end() || mapSubtreesToPar[*it].size() > itg->size()))
+            {
+                //
+                set<TreeNode *> sint;
+                JoinSetsGen(*itg, *it, sint);
+                if (sint.size() == it->size())
+                {
+                    //
+                    if (mapSubtreesToPar.find(*it) == mapSubtreesToPar.end())
+                    {
+                        mapSubtreesToPar.insert(map<set<TreeNode *>, set<TreeNode *>>::value_type(*it, *itg));
+                    }
+                    else
+                    {
+                        mapSubtreesToPar[*it] = *itg;
+                    }
+                }
+            }
+        }
+    }
+    map<set<TreeNode *>, set<TreeNode *>> mapRevParToSubtrees;
+    for (map<set<TreeNode *>, set<TreeNode *>>::iterator it = mapSubtreesToPar.begin(); it != mapSubtreesToPar.end(); ++it)
+    {
+        //
+        if (mapRevParToSubtrees.find(it->second) == mapRevParToSubtrees.end())
+        {
+            mapRevParToSubtrees.insert(map<set<TreeNode *>, set<TreeNode *>>::value_type(it->second, it->first));
+        }
+        else
+        {
+            UnionSetsGen(mapRevParToSubtrees[it->second], it->first);
+        }
+    }
+    setSubtreeClades.clear();
+    for (map<set<TreeNode *>, set<TreeNode *>>::iterator it = mapRevParToSubtrees.begin(); it != mapRevParToSubtrees.end(); ++it)
+    {
+        setSubtreeClades.insert(it->second);
+    }
+}
 
-TreeNode *PhylogenyTreeBasic ::ConsOnNewickSubtree(const string &nwStringPart,
-                                                   int &leafId, int &invId,
-                                                   int numLeaves,
-                                                   bool fBottomUp,
-                                                   TaxaMapper *pTMapper) {
-  // cout << "Entry nwStringPart = "<< nwStringPart << endl;
-
-  TreeNode *pres = NULL;
-  int posLenBegin = -1;
-
-  // this function builds recursively subtrees for this part of string
-  // First, is this string a leaf or not
-  if (nwStringPart[0] != '(') {
-    // TreeNode *pLeaf = new TreeNode( nodeId  );
-    //// also set its label this way
-    // pLeaf->AddNodeValue( nodeId );
-
-    // 7/27/10 YW: for now, we take this convention:
-    // tree node id = label  if no mapper is passed
-    // Why? This case is by default for internal use only
-    // while mapper is used for external (user) specified
-    // Yes, this is a leaf
-    int nodeId = TaxaMapper ::GetIdFromStr(nwStringPart, pTMapper);
-    //	sscanf( nwStringPart.c_str(), "%d", &nodeId  );
-
-    if (numLeaves > 0) {
-      if (nodeId >= numLeaves) {
-        cout << "Wrong: nodeId = " << nodeId << ", numLeaves = " << numLeaves
-             << endl;
-      }
-      YW_ASSERT_INFO(nodeId < numLeaves,
-                     "We assume in phylogeny tree, leaf id starts from 0");
-    }
-    // cout << "node id = " << nodeId << endl;
-
-    int idtouse = leafId;
-    if (pTMapper == NULL) {
-      // in this case take the same as node id
-      idtouse = nodeId;
-    } else {
-      // update leafid since we are using it
-      leafId++;
-    }
-
-    TreeNode *pLeaf = new TreeNode(idtouse);
-    // also set its label this way
-    pLeaf->AddNodeValue(idtouse);
-    // leafId ++;
-
-    // get rid of any part after : if there is length info
-    // string strLeafLabel = nwStringPart;
-    // if( strLa )
-    //{
-    //}
-    string strLbl = GetStringFromId(nodeId);
-    pLeaf->SetLabel(strLbl);
-
-    string strLblUser = TaxaMapper ::ExtractIdPartFromStr(nwStringPart);
-    pLeaf->SetUserLabel(strLblUser);
-
-    // cout << "ConsOnNewickSubtree: set leaf label: " << strLbl << endl;
-    // return pLeaf;
-    pres = pLeaf;
-
-    size_t posLenSep = nwStringPart.find(':');
-    if (posLenSep != string::npos) {
-      //
-      posLenBegin = posLenSep + 1;
-    }
-  } else {
-    // This is not a leaf
-    // so we create underlying level for it
-    int idToUse = 1000;
-    if (fBottomUp == false) {
-      idToUse = invId++;
-    }
-    TreeNode *pInternal = new TreeNode(idToUse);
-    int lastpos = 1;
-    int curpos = 0;
-    int parnet = 0; // (: +1, ) -1
-    while (true) {
-      // cout << "curpos = " << curpos << endl;
-
-      if (curpos >= (int)nwStringPart.size()) {
-        // we are done
-        break;
-      }
-
-      // keep balance
-      if (nwStringPart[curpos] == '(') {
-        parnet++;
-      } else if (nwStringPart[curpos] == ')') {
-        parnet--;
-
-        // when parnet = 0, we know we end
-        if (parnet == 0) {
-          // now adding the last piece
-          // create a new node
-          int strl = curpos - lastpos;
-          string subs = nwStringPart.substr(lastpos, strl);
-          //    cout << "last subs = " << subs << endl;
-          TreeNode *pChild = ConsOnNewickSubtree(subs, leafId, invId, numLeaves,
-                                                 fBottomUp, pTMapper);
-
-          // also append it as child
-          vector<int> empytLabels;
-          pInternal->AddChild(pChild, empytLabels);
-
-          // aslo update lastpos
-          lastpos = curpos + 1;
-        }
-
-      } else if (nwStringPart[curpos] == ',') {
-        // Yes, this is a sepeartor, but we only start to process it when the
-        // balance of parenetnis is right
-        if (parnet == 1) {
-          // create a new node
-          int strl = curpos - lastpos;
-          string subs = nwStringPart.substr(lastpos, strl);
-          //    cout << "subs = " << subs << endl;
-          TreeNode *pChild = ConsOnNewickSubtree(subs, leafId, invId, numLeaves,
-                                                 fBottomUp, pTMapper);
-
-          // also append it as child
-          vector<int> empytLabels;
-          pInternal->AddChild(pChild, empytLabels);
-
-          // aslo update lastpos
-          lastpos = curpos + 1;
-        }
-      } else if (nwStringPart[curpos] == ':') {
-        // keep track of length
-        if (parnet == 0) {
-          posLenBegin = curpos + 1;
-        }
-      }
-
-      // now move to next pos
-      curpos++;
-    }
-
-    // if we go bottom up labeling the node, we should re-label the node here
-    if (fBottomUp == true) {
-      pInternal->SetID(invId++);
-    }
-    // return pInternal;
-    pres = pInternal;
-  }
-
-  //
-  if (posLenBegin >= 0) {
-    // also read in length
-    size_t posRightExt = nwStringPart.find(')', posLenBegin);
-    int rightPos = (int)nwStringPart.size() - 1;
-    if (posRightExt != string::npos) {
-      rightPos = posRightExt - 1;
-    }
-    string subs =
-        nwStringPart.substr(posLenBegin, posRightExt - posLenBegin + 1);
-    double len = StrToDouble(subs);
-    pres->SetLength(len);
-  }
-  return pres;
-}
-
-TreeNode *PhylogenyTreeBasic ::ConsOnNewickSubtreeDupLabels(
-    const string &nwStringPart, int &invId, int &leafId, TaxaMapper *pTMapper) {
-  // cout << "Entry nwStringPart = "<< nwStringPart << endl;
-
-  // this function builds recursively subtrees for this part of string
-  // First, is this string a leaf or not
-  if (nwStringPart[0] != '(') {
-    // ensure no internal has every been set yet
-    // YW_ASSERT_INFO( invId < 0, "invId should not be set when leaf is being
-    // processed" );
-
-    // Yes, this is a leaf
-    int nodeId = leafId;
-    leafId++;
-    int leafLabel = TaxaMapper ::GetIdFromStr(nwStringPart, pTMapper);
-    // sscanf( nwStringPart.c_str(), "%d", &leafLabel  );
-
-    // cout << "leaf id = " << nodeId << endl;
-    TreeNode *pLeaf = new TreeNode(nodeId);
-    // also set its label this way
-    pLeaf->AddNodeValue(nodeId);
-
-    // get rid of any part after : if there is length info
-    // string strLeafLabel = nwStringPart;
-    // if( strLa )
-    //{
-    //}
-    char buf[1000];
-    sprintf(buf, "%d", leafLabel);
-    string strLabel = buf;
-    pLeaf->SetLabel(strLabel);
-
-    string strLabelUser = TaxaMapper ::ExtractIdPartFromStr(nwStringPart);
-    pLeaf->SetUserLabel(strLabelUser);
-
-    // cout << "ConsOnNewickSubtree: set leaf label: " << strLabel << endl;
-    return pLeaf;
-  } else {
-
-    // This is not a leaf
-    // so we create underlying level for it
-    int idToUse = invId;
-    TreeNode *pInternal = new TreeNode(idToUse);
-    int lastpos = 1;
-    int curpos = 0;
-    int parnet = 0; // (: +1, ) -1
-    while (true) {
-      // cout << "curpos = " << curpos << endl;
-
-      if (curpos >= (int)nwStringPart.size()) {
-        // we are done
-        break;
-      }
-
-      // keep balance
-      if (nwStringPart[curpos] == '(') {
-        parnet++;
-      } else if (nwStringPart[curpos] == ')') {
-        parnet--;
-
-        // when parnet = 0, we know we end
-        if (parnet == 0) {
-          // now adding the last piece
-          // create a new node
-          int strl = curpos - lastpos;
-          string subs = nwStringPart.substr(lastpos, strl);
-          //    cout << "last subs = " << subs << endl;
-          TreeNode *pChild =
-              ConsOnNewickSubtreeDupLabels(subs, invId, leafId, pTMapper);
-
-          // also append it as child
-          vector<int> empytLabels;
-          pInternal->AddChild(pChild, empytLabels);
-
-          // aslo update lastpos
-          lastpos = curpos + 1;
-        }
-
-      } else if (nwStringPart[curpos] == ',') {
-        // Yes, this is a sepeartor, but we only start to process it when the
-        // balance of parenetnis is right
-        if (parnet == 1) {
-          // create a new node
-          int strl = curpos - lastpos;
-          string subs = nwStringPart.substr(lastpos, strl);
-          //    cout << "subs = " << subs << endl;
-          TreeNode *pChild =
-              ConsOnNewickSubtreeDupLabels(subs, invId, leafId, pTMapper);
-
-          // also append it as child
-          vector<int> empytLabels;
-          pInternal->AddChild(pChild, empytLabels);
-
-          // aslo update lastpos
-          lastpos = curpos + 1;
-        }
-      }
-
-      // now move to next pos
-      curpos++;
-    }
-
-    // if we go bottom up labeling the node, we should re-label the node here
-    // if(invId < 0 )
-    //{
-    //	invId = leafId;
-    //}
+void PhylogenyTreeBasic ::GetAllClades(set<set<int>> &setClades)
+{
+    //
+    setClades.clear();
+    // now clean it by removing each node that does not appear in that
+    PhylogenyTreeIterator itorTree(*this);
+    itorTree.Init();
+    while (itorTree.IsDone() == false)
+    {
+        TreeNode *pn = itorTree.GetCurrNode();
+        itorTree.Next();
+        if (pn == NULL)
+        {
+            break; // done with all nodes
+        }
+        //cout << "node id = " << pn->GetID() << endl;
+        set<TreeNode *> setDescendents;
+        pn->GetAllLeavesUnder(setDescendents);
+        set<int> sint;
+        for (set<TreeNode *>::iterator itg = setDescendents.begin(); itg != setDescendents.end(); ++itg)
+        {
+            sint.insert((*itg)->GetIntLabel());
+        }
+        setClades.insert(sint);
+    }
+}
 
-    pInternal->SetID(invId++);
-    // cout << "Set internal node to " << pInternal->GetID() << endl;
-    return pInternal;
-  }
+void PhylogenyTreeBasic ::GetAllCladesList(vector<set<int>> &listClades)
+{
+    listClades.clear();
+    // now clean it by removing each node that does not appear in that
+    PhylogenyTreeIterator itorTree(*this);
+    itorTree.Init();
+    while (itorTree.IsDone() == false)
+    {
+        TreeNode *pn = itorTree.GetCurrNode();
+        itorTree.Next();
+        if (pn == NULL)
+        {
+            break; // done with all nodes
+        }
+        //cout << "node id = " << pn->GetID() << endl;
+        set<TreeNode *> setDescendents;
+        pn->GetAllLeavesUnder(setDescendents);
+        set<int> sint;
+        for (set<TreeNode *>::iterator itg = setDescendents.begin(); itg != setDescendents.end(); ++itg)
+        {
+            sint.insert((*itg)->GetIntLabel());
+        }
+        listClades.push_back(sint);
+    }
 }
 
-// Get nodes info
-// 7/27/10: we want to get node label (NOT id!)
-void PhylogenyTreeBasic ::GetNodeParInfo(vector<int> &nodeIds,
-                                         vector<int> &parPos) {
-  // cout << "GetNodeParInfo: \n";
-  // simply put consecutive node ids but keep track of node parent positions
-  // ensure we get the correct node mapping between id and pointer to node
-  map<TreeNode *, int> mapNodeIds;
-
-  // id is simply consecutive
-  int numTotVerts = GetNumVertices();
-  nodeIds.resize(numTotVerts);
-  for (int i = 0; i < numTotVerts; ++i) {
-    nodeIds[i] = i;
-  }
-  parPos.resize(numTotVerts);
-  for (int i = 0; i < numTotVerts; ++i) {
-    parPos[i] = -1;
-  }
-
-  // IMPORTANT: assume binary tree, otherwise all bets are off!!!!
-  // int numLeaves = ( numTotVerts+1 )/2;
-  int numLeaves = GetNumLeaves();
-  // cout << "numLeaves: " << numLeaves << endl;
-  // do traversal
-  int curNodeNum = 0;
-  // InitPostorderWalk();
-  PhylogenyTreeIterator itorTree(*this);
-  itorTree.Init();
-  while (itorTree.IsDone() == false) {
-    TreeNode *pn = itorTree.GetCurrNode();
-    itorTree.Next();
-    // TreeNode *pn = NextPostorderWalk( ) ;
-    if (pn == NULL) {
-      // cout << "No node here. Stop.\n";
-      break; // done with all nodes
+// different from the above, (1) we allow duplicate int-labels (and thus multiset)
+// (2) group clades by common parents
+void PhylogenyTreeBasic ::GetAllCladeGroupsIntLabel(multiset<multiset<multiset<int>>> &setCladeGroupsDupLabels, multiset<int> &rootClade)
+{
+    // group all clades by parent nodes (i.e. clades with same parent are in one class)
+    // root clade: the one with all leaves
+    map<TreeNode *, multiset<multiset<int>>> mapCladeGroupsForNode;
+
+    //
+    setCladeGroupsDupLabels.clear();
+    rootClade.clear();
+    // now clean it by removing each node that does not appear in that
+    PhylogenyTreeIterator itorTree(*this);
+    itorTree.Init();
+    while (itorTree.IsDone() == false)
+    {
+        TreeNode *pn = itorTree.GetCurrNode();
+        itorTree.Next();
+        if (pn == NULL)
+        {
+            break; // done with all nodes
+        }
+        //cout << "node id = " << pn->GetID() << endl;
+        set<TreeNode *> setDescendents;
+        pn->GetAllLeavesUnder(setDescendents);
+        multiset<int> sint;
+        for (set<TreeNode *>::iterator itg = setDescendents.begin(); itg != setDescendents.end(); ++itg)
+        {
+            sint.insert((*itg)->GetIntLabel());
+        }
+        TreeNode *pnPar = pn->GetParent();
+        if (pnPar == NULL)
+        {
+            // this is the root clade
+            rootClade = sint;
+        }
+        else
+        {
+            if (mapCladeGroupsForNode.find(pnPar) == mapCladeGroupsForNode.end())
+            {
+                multiset<multiset<int>> mms;
+                mapCladeGroupsForNode.insert(map<TreeNode *, multiset<multiset<int>>>::value_type(pnPar, mms));
+            }
+            mapCladeGroupsForNode[pnPar].insert(sint);
+        }
+    }
+    YW_ASSERT_INFO(rootClade.size() > 0, "Fail to collect root clade");
+    for (map<TreeNode *, multiset<multiset<int>>>::iterator it = mapCladeGroupsForNode.begin(); it != mapCladeGroupsForNode.end(); ++it)
+    {
+        //
+        setCladeGroupsDupLabels.insert(it->second);
     }
+}
 
+void PhylogenyTreeBasic ::GetAllCladesById(set<set<int>> &setClades)
+{
     //
-    if (pn->IsLeaf() == true) {
-      // skip it for now
-      continue;
+    setClades.clear();
+    // now clean it by removing each node that does not appear in that
+    PhylogenyTreeIterator itorTree(*this);
+    itorTree.Init();
+    while (itorTree.IsDone() == false)
+    {
+        TreeNode *pn = itorTree.GetCurrNode();
+        itorTree.Next();
+        if (pn == NULL)
+        {
+            break; // done with all nodes
+        }
+        //cout << "node id = " << pn->GetID() << endl;
+        set<TreeNode *> setDescendents;
+        pn->GetAllLeavesUnder(setDescendents);
+        set<int> sint;
+        for (set<TreeNode *>::iterator itg = setDescendents.begin(); itg != setDescendents.end(); ++itg)
+        {
+            sint.insert((*itg)->GetID());
+        }
+        setClades.insert(sint);
     }
+}
 
+void PhylogenyTreeBasic ::GetAllCladeNodess(set<set<TreeNode *>> &setClades)
+{
     //
-    int nonleafInd = numLeaves + curNodeNum;
-    curNodeNum++;
-    // remember it
-    mapNodeIds.insert(map<TreeNode *, int>::value_type(pn, nonleafInd));
-    // now set its descendents to this index, either leaf or non-leaf
-    // if it is non-leaf, do a lookup of the stored id. Leaf: just go by its id
-    for (int jj = 0; jj < pn->GetChildrenNum(); ++jj) {
-      TreeNode *pnjj = pn->GetChild(jj);
-      int pnjjid;
-      int pnjjlabel = -1;
-      if (pnjj->IsLeaf() == true) {
-        pnjjid = pnjj->GetID();
-        // assume id is distinct, while label can be duplicate
-        pnjjlabel = pnjj->GetIntLabel();
-        // cout << "pnjjid = " << pnjjid << ", pnjjlabel: " << pnjjlabel << ",
-        // numLeaves: " << numLeaves << endl;
-        YW_ASSERT_INFO(pnjjid >= 0 && pnjjid < numLeaves,
-                       "Leaf id: out of range");
-      } else {
-        YW_ASSERT_INFO(mapNodeIds.find(pnjj) != mapNodeIds.end(),
-                       "Fail to find the node");
-        pnjjid = mapNodeIds[pnjj];
-      }
-      parPos[pnjjid] = nonleafInd;
-      // this says whether we change the label of the node
-      // this is needed when there are duplicate labels in the tree
-      if (pnjjlabel >= 0) {
-        nodeIds[pnjjid] = pnjjlabel;
-      }
-    }
-  }
-
-  // print out
-  // cout << "original tree:  ";
-  // string strTree;
-  // ConsNewick(strTree);
-  // cout << strTree << endl;
-  // cout << "Parent position : ";
-  // DumpIntVec( parPos );
-}
-
-void PhylogenyTreeBasic ::GetNodeParInfoNew(vector<int> &nodeIds,
-                                            vector<int> &parPos) {
-  // cout << "In GetNodeParInfoNew: tree is: ";
-  // this->Dump();
-  // the previous version has various of problems, but it is being used by some
-  // programs so I decide to add a new function Note this one assume all nodes
-  // are labeled consecutively simply put consecutive node ids but keep track of
-  // node parent positions ensure we get the correct node mapping between id and
-  // pointer to node
-  // map<TreeNode *,int> mapNodeIds;
-
-  // id is simply consecutive
-  int numTotVerts = GetNumVertices();
-  // nodeIds.resize(numTotVerts);
-  // for(int i=0; i<numTotVerts; ++i)
-  //{
-  //	nodeIds[i] = i;
-  //}
-  // parPos.resize(numTotVerts);
-  // for(int i=0; i<numTotVerts; ++i)
-  //{
-  //	parPos[i] = -1;
-  //}
-
-  // IMPORTANT: assume binary tree, otherwise all bets are off!!!!
-  // int numLeaves = ( numTotVerts+1 )/2;
-  int numLeaves = GetNumLeaves();
-  // cout << "Numleaves = " << numLeaves << endl;
-  // do traversal
-  // int curNodeNum = 0;
-  // InitPostorderWalk();
-  PhylogenyTreeIterator itorTree(*this);
-  itorTree.Init();
-  while (itorTree.IsDone() == false) {
-    TreeNode *pn = itorTree.GetCurrNode();
-    itorTree.Next();
-    // TreeNode *pn = NextPostorderWalk( ) ;
-    if (pn == NULL) {
-      // cout << "No node here. Stop.\n";
-      break; // done with all nodes
+    setClades.clear();
+    // now clean it by removing each node that does not appear in that
+    PhylogenyTreeIterator itorTree(*this);
+    itorTree.Init();
+    while (itorTree.IsDone() == false)
+    {
+        TreeNode *pn = itorTree.GetCurrNode();
+        itorTree.Next();
+        if (pn == NULL)
+        {
+            break; // done with all nodes
+        }
+        //cout << "node id = " << pn->GetID() << endl;
+        set<TreeNode *> setDescendents;
+        pn->GetAllLeavesUnder(setDescendents);
+
+        setClades.insert(setDescendents);
+    }
+}
+
+TreeNode *PhylogenyTreeBasic ::GetSubtreeRootForLeaves(const set<TreeNode *> &setLvNodes)
+{
+    PhylogenyTreeIterator itorTree(*this);
+    itorTree.Init();
+    while (itorTree.IsDone() == false)
+    {
+        TreeNode *pn = itorTree.GetCurrNode();
+        itorTree.Next();
+        if (pn == NULL)
+        {
+            break; // done with all nodes
+        }
+        //cout << "node id = " << pn->GetID() << endl;
+        set<TreeNode *> setDescendents;
+        pn->GetAllLeavesUnder(setDescendents);
+
+        if (setLvNodes == setDescendents)
+        {
+            return pn;
+        }
     }
+    return NULL;
+}
 
+void PhylogenyTreeBasic ::GroupNodesWithCommonPars(const set<TreeNode *> &setNodes, map<TreeNode *, set<TreeNode *>> &mapNodesWithSamePar)
+{
     //
-    int curNodeId = pn->GetID();
-    // cout << "curNodeId: " << curNodeId << endl;
-    YW_ASSERT_INFO(
-        curNodeId < numTotVerts,
-        "curNodeId exceeds limit (the node ids must be consecutive from 0)");
-    if (pn->IsLeaf() == true) {
-      // skip it for now
-      YW_ASSERT_INFO(
-          curNodeId < numLeaves,
-          "The tree violates assumption that tree leaf id start from 0");
-    }
-
-    // add a record
-    nodeIds.push_back(pn->GetID());
-    TreeNode *pnPar = pn->GetParent();
-    if (pnPar == NULL) {
-      parPos.push_back(-1);
-    } else {
-      // simply its id
-      parPos.push_back(pnPar->GetID());
-    }
-
-    //	continue;
-    //}
-#if 0
-		//
-		//int nonleafInd = numLeaves + curNodeNum;
-		int nonleafInd = curNodeId;
-		//curNodeNum++;
-		// remember it
-		mapNodeIds.insert( map<TreeNode *,int> :: value_type( pn, curNodeId ) );
-		// now set its descendents to this index, either leaf or non-leaf
-		// if it is non-leaf, do a lookup of the stored id. Leaf: just go by its id
-		for(int jj=0; jj<pn->GetChildrenNum(); ++jj)
-		{
-			TreeNode *pnjj = pn->GetChild(jj);
-			int pnjjid;
-			if( pnjj->IsLeaf() == true )
-			{
-				pnjjid = pnjj->GetID();
-				YW_ASSERT_INFO( pnjjid >=0 && pnjjid < numLeaves, "Leaf id: out of range" );
-			}
-			else
-			{
-				YW_ASSERT_INFO( mapNodeIds.find( pnjj ) != mapNodeIds.end(), "Fail to find the node"  );
-				pnjjid = mapNodeIds[pnjj];
-			}
-			parPos[pnjjid] = nonleafInd;
-#endif
-    //}
-  }
+    mapNodesWithSamePar.clear();
+    for (set<TreeNode *>::const_iterator it = setNodes.begin(); it != setNodes.end(); ++it)
+    {
+        //
+        TreeNode *ppar = (*it)->GetParent();
+        if (mapNodesWithSamePar.find(ppar) == mapNodesWithSamePar.end())
+        {
+            set<TreeNode *> ss;
+            mapNodesWithSamePar.insert(map<TreeNode *, set<TreeNode *>>::value_type(ppar, ss));
+        }
+        mapNodesWithSamePar[ppar].insert(*it);
+    }
+}
 
-  // print out
-  // cout << "original tree:  ";
-  // string strTree;
-  // ConsNewick(strTree);
-  // cout << strTree << endl;
-  // cout << "Parent position : ";
-  // DumpIntVec( parPos );
+void PhylogenyTreeBasic ::RemoveEdgeLabels()
+{
+    //
+    this->rootNode->RemoveLabels();
 }
 
-//
-bool PhylogenyTreeBasic ::ConsOnParPosList(const vector<int> &parPos,
-                                           int numLeaves, bool fBottupUpLabel) {
-  //
-  string strNewick;
-  if (ConvParPosToNewick(parPos, strNewick) == false) {
-    return false;
-  }
-  // cout << "Newick string = " << strNewick << endl;
-  ConsOnNewick(strNewick, numLeaves, fBottupUpLabel);
-  return true;
-}
-
-bool PhylogenyTreeBasic ::ConvParPosToNewick(const vector<int> &parPos,
-                                             string &strNewick) {
-  // convert par position representation to newick
-  // we always assume the last item is -1
-  YW_ASSERT_INFO(parPos[parPos.size() - 1] == -1,
-                 "Must be -1 for the last value in parPos");
-  ConvParPosToNewickSubtree(parPos.size() - 1, parPos, strNewick);
-  return true;
-}
-
-void PhylogenyTreeBasic ::ConvParPosToNewickSubtree(int nodeInd,
-                                                    const vector<int> &parPos,
-                                                    string &strNewick) {
-  // this function generate under a single node (leaf or non-leaf), the newick
-  // under the subtree
-  vector<int> listUnderNodeInds;
-  for (int i = 0; i < (int)parPos.size(); ++i) {
-    if (parPos[i] == nodeInd) {
-      listUnderNodeInds.push_back(i);
-    }
-  }
-  // leaf if empty
-  if (listUnderNodeInds.size() == 0) {
-    char buf[100];
-    sprintf(buf, "%d", nodeInd);
-    strNewick = buf;
-    return;
-  }
-  YW_ASSERT_INFO(listUnderNodeInds.size() == 2,
-                 "Only binary trees are supported for now");
-
-  // now get newick for the two part and merge it
-  string strFirst, strSecond;
-  ConvParPosToNewickSubtree(listUnderNodeInds[0], parPos, strFirst);
-  ConvParPosToNewickSubtree(listUnderNodeInds[1], parPos, strSecond);
-  strNewick = "(";
-  strNewick += strFirst;
-  strNewick += ",";
-  strNewick += strSecond;
-  strNewick += ")";
-}
-
-void PhylogenyTreeBasic ::GetLeaveIds(set<int> &lvids) {
-  lvids.clear();
-
-  PhylogenyTreeIterator itorTree(*this);
-  itorTree.Init();
-  while (itorTree.IsDone() == false) {
-    TreeNode *pn = itorTree.GetCurrNode();
-    itorTree.Next();
-    if (pn == NULL) {
-      break; // done with all nodes
-    }
-    if (pn->IsLeaf() == true) {
-      lvids.insert(pn->GetID());
-    }
-  }
-}
-void PhylogenyTreeBasic ::GetLeafIntLabels(set<int> &setIntLabels) {
-  vector<TreeNode *> listLeafNodes;
-  GetAllLeafNodes(listLeafNodes);
-  setIntLabels.clear();
-  for (int i = 0; i < (int)listLeafNodes.size(); ++i) {
-    setIntLabels.insert(listLeafNodes[i]->GetIntLabel());
-  }
-}
-
-void PhylogenyTreeBasic::GetLeavesIdsWithLabel(const string &label,
-                                               set<int> &lvids) {
-  lvids.clear();
-  PhylogenyTreeIterator itorTree(*this);
-  itorTree.Init();
-  while (itorTree.IsDone() == false) {
-    TreeNode *pn = itorTree.GetCurrNode();
-    // cout << "GetLeavesIdsWithLabel: ";
-    // cout << pn->GetLabel() << endl;
-    itorTree.Next();
-    if (pn == NULL) {
-      break; // done with all nodes
-    }
-    if (pn->GetLabel() == label) {
-      lvids.insert(pn->GetID());
-    }
-  }
-}
-
-void PhylogenyTreeBasic ::GetLeavesWithLabels(const set<string> &setLabels,
-                                              set<TreeNode *> &setLvNodes) {
-  //
-  setLvNodes.clear();
-  PhylogenyTreeIterator itorTree(*this);
-  itorTree.Init();
-  while (itorTree.IsDone() == false) {
-    TreeNode *pn = itorTree.GetCurrNode();
-    // cout << "GetLeavesIdsWithLabel: ";
-    // cout << pn->GetLabel() << endl;
-    itorTree.Next();
-    if (pn == NULL) {
-      break; // done with all nodes
-    }
-    if (setLabels.find(pn->GetLabel()) != setLabels.end()) {
-      setLvNodes.insert(pn);
-    }
-  }
-}
-
-void PhylogenyTreeBasic ::UpdateIntLabel(const vector<int> &listLabels) {
-  // by assumption, id is from 0 to the following
-  PhylogenyTreeIterator itorTree(*this);
-  itorTree.Init();
-  while (itorTree.IsDone() == false) {
-    TreeNode *pn = itorTree.GetCurrNode();
-    itorTree.Next();
-    if (pn == NULL) {
-      break; // done with all nodes
-    }
-    // cout << "node id = " << pn->GetID() << endl;
-
-    YW_ASSERT_INFO(pn->GetID() < (int)listLabels.size(), "Tree id: over limit");
-    int lblInt = listLabels[pn->GetID()];
-    char strbuf[100];
-    sprintf(strbuf, "%d", lblInt);
-    string lblNew = strbuf;
-    pn->SetLabel(lblNew);
-  }
-}
-
-void PhylogenyTreeBasic ::Reroot(TreeNode *pRootDesc) {
-  YW_ASSERT_INFO(pRootDesc != NULL, "Can not take NULL pointer");
-  // if the node is set ot be root, nothing to be done
-  if (pRootDesc == rootNode) {
-    return;
-  }
-  // cout << "pass1\n";
-  // create a new node
-  // vector<int> dummyLbls;
-  TreeNode *pRootNew = new TreeNode(rootNode->GetID());
-  TreeNode *pRootOtherDesc = pRootDesc->GetParent();
-  YW_ASSERT_INFO(pRootOtherDesc != NULL, "TBD");
-  vector<int> lblsNew;
-  // for now, concerntrate the labels without SPLITTING
-  pRootOtherDesc->GetEdgeLabelsToChild(pRootDesc, lblsNew);
-  pRootOtherDesc->RemoveChild(pRootDesc);
-  pRootNew->AddChild(pRootDesc, lblsNew);
-  // cout << "pass2\n";
-  //
-  TreeNode *pCurNode = pRootOtherDesc;
-  TreeNode *pCurNodePar = pRootNew;
-  while (true) {
-    // setup the ancestral relationship
-    YW_ASSERT_INFO(pCurNode != NULL && pCurNodePar != NULL, "Something wrong");
-    // cout << "BEFORE CHANGING...\n";
-    // cout << "pCurNode: label =" << pCurNode->GetLabel() << ", ID = " <<
-    // pCurNode->GetID() << ", num of children " << pCurNode->GetChildrenNum()
-    // << endl; for( int pp=0; pp< pCurNode->GetChildrenNum(); ++pp )
-    //{
-    // cout << "** Child: " << pCurNode->GetChild(pp)->GetID() << endl;
-    //}
-    // cout << "pCurNodePar: label =" << pCurNodePar->GetLabel() << ", ID = " <<
-    // pCurNodePar->GetID()  << ", num of children " <<
-    // pCurNodePar->GetChildrenNum()  << endl; for( int pp=0; pp<
-    // pCurNodePar->GetChildrenNum(); ++pp )
-    //{
-    // cout << "** Child: " << pCurNodePar->GetChild(pp)->GetID() << endl;
-    //}
-    vector<int> lblsNew;
-    pCurNode->GetEdgeLabelsToChild(pCurNodePar, lblsNew);
-    TreeNode *pNodeNext = pCurNode->GetParent();
-    pCurNode->RemoveChild(pCurNodePar);
-    // pCurNode->SetParent(pCurNodePar);
-    pCurNodePar->AddChild(pCurNode, lblsNew);
+void PhylogenyTreeBasic ::RemoveEdgeLabelsToLeaves()
+{
+    // get all leaves
+    vector<TreeNode *> vecLeaves;
+    GetAllLeafNodes(vecLeaves);
+    for (int i = 0; i < (int)vecLeaves.size(); ++i)
+    {
+        vecLeaves[i]->RemoveLabelsPar();
+    }
+}
 
-#if 0
-		vector<TreeNode *> listParChildren;
-		for(int c=0; c<(int)pCurNode->GetChildrenNum(); ++c  )
-		{
-			//if( pCurNode->GetChild(c) != pCurNode )
-			//{
-			listParChildren.push_back( pCurNode->GetChild(c) ) ;
-			//}
-		}
-		for(int c=0; c<(int)listParChildren.size(); ++c  )
-		{
-			//if( pCurNode->GetChild(c) != pCurNode )
-			//{
-			pCurNode->RemoveChild( listParChildren[c] ) ;
-			//}
-		}
-		// add these to the descendent of the new par
-		for( int c=0; c<(int)listParChildren.size(); ++c )
-		{
-			vector<int> emptyLbls;
-			pCurNodePar->AddChild(listParChildren[c], emptyLbls);
-		}
-#endif
+void PhylogenyTreeBasic ::IncEdgeLabelsBy(int offset)
+{
+    // inc edge label of this node (and subtree if needed)
+    this->rootNode->IncEdgeLabelsBy(offset, true);
+}
+
+string PhylogenyTreeBasic ::GetShapeLabelNodeBrNum(map<TreeNode *, pair<int, int>> &mapNodeNumBrannches, vector<int> &listOrderedLeaves)
+{
+    // format: <num of underlying branches, event id>, negative for internal nodes
+    map<TreeNode *, pair<int, int>> mapNodeNumBrannchesUse = mapNodeNumBrannches;
+    // given: num of branches at each node,
+    // return shape label as empty Newick format
+    // for this, first need to find out all nodes that all descendents have appeared in the tree
+    set<TreeNode *> setAncesNotGiven;
+    for (map<TreeNode *, pair<int, int>>::iterator it = mapNodeNumBrannches.begin(); it != mapNodeNumBrannches.end(); ++it)
+    {
+        set<TreeNode *> setAllAnces;
+        it->first->GetAllAncestors(setAllAnces);
+        for (set<TreeNode *>::iterator itg = setAllAnces.begin(); itg != setAllAnces.end(); ++itg)
+        {
+            if (mapNodeNumBrannches.find(*itg) == mapNodeNumBrannches.end())
+            {
+                //
+                pair<int, int> pp(-1, -1);
+                mapNodeNumBrannchesUse.insert(map<TreeNode *, pair<int, int>>::value_type(*itg, pp));
+            }
+        }
+    }
+    // now call the root to find the label
+    return this->rootNode->GetShapeLabelNodeBrNum(mapNodeNumBrannchesUse, listOrderedLeaves);
+}
 
-    // cout << "AFTER CHANGING...\n";
-    // cout << "pCurNode: label =" << pCurNode->GetLabel() << ", ID = " <<
-    // pCurNode->GetID() << ", num of children " << pCurNode->GetChildrenNum()
-    // << endl; for( int pp=0; pp< pCurNode->GetChildrenNum(); ++pp )
+void PhylogenyTreeBasic ::MakeSubtreeUnrefined(TreeNode *pSubtree)
+{
+    // make this subtree unrefined (i.e. each leaf points to the root
+    // CAUTION: all edge labels are LOST!!!!
+    set<TreeNode *> setAllLeavesUnder;
+    pSubtree->GetAllLeavesUnder(setAllLeavesUnder);
+    //cout << "setAllLeavesUnder: ";
+    //for( set<TreeNode *> :: iterator it = setAllLeavesUnder.begin(); it != setAllLeavesUnder.end(); ++it)
     //{
-    // cout << "** Child: " << pCurNode->GetChild(pp)->GetID() << endl;
+    //(*it)->Dump();
     //}
-    // cout << "pCurNodePar: label =" << pCurNodePar->GetLabel() << ", ID = " <<
-    // pCurNodePar->GetID()  << ", num of children " <<
-    // pCurNodePar->GetChildrenNum()  << endl; for( int pp=0; pp<
-    // pCurNodePar->GetChildrenNum(); ++pp )
+    //cout << endl;
+    set<TreeNode *> setAllDescUnder;
+    pSubtree->GetAllDescendents(setAllDescUnder);
+    //cout << "setAllDescUnder: ";
+    //for( set<TreeNode *> :: iterator it = setAllDescUnder.begin(); it != setAllDescUnder.end(); ++it)
     //{
-    // cout << "** Child: " << pCurNodePar->GetChild(pp)->GetID() << endl;
+    //(*it)->Dump();
     //}
+    //cout << endl;
 
-    // find the other descendents of the par
-    if (pNodeNext == NULL) {
-      vector<TreeNode *> listParChildren;
-      for (int c = 0; c < (int)pCurNode->GetChildrenNum(); ++c) {
-        // if( pCurNode->GetChild(c) != pCurNode )
-        //{
-        listParChildren.push_back(pCurNode->GetChild(c));
-        //}
-      }
-      for (int c = 0; c < (int)listParChildren.size(); ++c) {
-        // if( pCurNode->GetChild(c) != pCurNode )
-        //{
-        pCurNode->RemoveChild(listParChildren[c]);
-        //}
-      }
-      // add these to the descendent of the new par
-      for (int c = 0; c < (int)listParChildren.size(); ++c) {
-        vector<int> lblsNew;
-        pCurNode->GetEdgeLabelsToChild(listParChildren[c], lblsNew);
-
-        // vector<int> emptyLbls;
-        pCurNodePar->AddChild(listParChildren[c], lblsNew);
-      }
-      pCurNodePar->RemoveChild(pCurNode);
-
-      // cout << "FINALLY...\n";
-      // cout << "pCurNode: label =" << pCurNode->GetLabel() << ", ID = " <<
-      // pCurNode->GetID() << ", num of children " << pCurNode->GetChildrenNum()
-      // << endl; for( int pp=0; pp< pCurNode->GetChildrenNum(); ++pp )
-      //{
-      // cout << "** Child: " << pCurNode->GetChild(pp)->GetID() << endl;
-      //}
-      // cout << "pCurNodePar: label =" << pCurNodePar->GetLabel() << ", ID = "
-      // << pCurNodePar->GetID()  << ", num of children " <<
-      // pCurNodePar->GetChildrenNum()  << endl; for( int pp=0; pp<
-      // pCurNodePar->GetChildrenNum(); ++pp )
-      //{
-      // cout << "** Child: " << pCurNodePar->GetChild(pp)->GetID() << endl;
-      //}
-      // done. pCurNode is the root, we should by-pass this node and assign
-      // their children to pCurNodePar
-      break;
+    // detach all leaves from their parent
+    for (set<TreeNode *>::iterator it = setAllLeavesUnder.begin(); it != setAllLeavesUnder.end(); ++it)
+    {
+        //
+        TreeNode *ppar = (*it)->GetParent();
+        ppar->RemoveChild(*it);
     }
-    //
-    pCurNodePar = pCurNode;
-    pCurNode = pNodeNext;
-  }
 
-  // finally get rid of the original root
-  delete rootNode;
-  rootNode = pRootNew;
+    pSubtree->RemoveAllChildren();
+
+    // remove all descendent except the leaves
+    for (set<TreeNode *>::iterator it = setAllDescUnder.begin(); it != setAllDescUnder.end(); ++it)
+    {
+        // need to be careful b/c node deletion is recurisvely
+        if (setAllLeavesUnder.find(*it) == setAllLeavesUnder.end() && (*it) != pSubtree && ((*it)->GetParent() == pSubtree))
+        {
+            //cout << "Delete this node: ";
+            //(*it)->Dump();
+            delete *it;
+        }
+    }
+    // then add the leaves directly under the subtree root
+    for (set<TreeNode *>::iterator it = setAllLeavesUnder.begin(); it != setAllLeavesUnder.end(); ++it)
+    {
+        vector<int> lblEmpty;
+        pSubtree->AddChild(*it, lblEmpty);
+    }
+    //string strTree;
+    //ConsNewick(strTree);
+    //cout << "After MakeSubtreeUnrefiined: tree is " << strTree << endl;
 }
 
-int PhylogenyTreeBasic ::GetNumLeaves() {
-  if (numLeaves > 0) {
-    return numLeaves;
-  }
-  set<int> lvids;
-  GetLeaveIds(lvids);
-  numLeaves = lvids.size();
-  return numLeaves;
+void PhylogenyTreeBasic ::Binarize()
+{
+    // make the tree binary
+    int idToUseNext = this->rootNode->GetMaxIdWithinSubtree() + 1;
+    this->rootNode->Binarize(idToUseNext);
+    //string strTree;
+    //ConsNewick(strTree);
+    //cout << "After binarization: tree is " << strTree << endl;
 }
 
-int PhylogenyTreeBasic ::GetNumInternalNodes() {
-  //
-  vector<TreeNode *> listAllNodes;
-  GetAllNodes(listAllNodes);
-  int res = 0;
-  for (int i = 0; i < (int)listAllNodes.size(); ++i) {
-    if (listAllNodes[i]->IsLeaf() == false) {
-      //
-      ++res;
+void PhylogenyTreeBasic ::CreatePhyTreeFromLeavesWithLabels(const set<string> &setLeafLabels, PhylogenyTreeBasic &treeSubsetLeaves, bool fUseOldTaxonName)
+{
+    // given a set of leaf labels, construct another phylogenetic tree that is extracted
+    // from the current tree by only taking those leaves with one of the given labels
+    // YW: caution: all taxa names are mapped to 0,1,2,... according to their order in list if fUseOldTaxonName=false
+    // otherwise, keep the original flag
+    set<int> setSubsetLeaves;
+    map<int, string> mapOrigIdToOrigStrLbl;
+    int idToUseFirst = 0;
+    for (set<string>::const_iterator it = setLeafLabels.begin(); it != setLeafLabels.end(); ++it)
+    {
+        string lblcur = *it;
+        set<int> setSubsetLeavesStep;
+        GetLeavesIdsWithLabel(lblcur, setSubsetLeavesStep);
+        //cout << "CreatePhyTreeFromLeavesWithLabels: lblcur: " << lblcur <<" setSubsetLeavesStep: ";
+        //DumpIntSet(setSubsetLeavesStep);
+        UnionSets(setSubsetLeaves, setSubsetLeavesStep);
+
+        string lblToUse = lblcur;
+        if (fUseOldTaxonName == false)
+        {
+            char buf[100];
+            sprintf(buf, "%d", idToUseFirst++);
+            lblToUse = buf;
+        }
+        for (set<int>::iterator it2 = setSubsetLeavesStep.begin(); it2 != setSubsetLeavesStep.end(); ++it2)
+        {
+            //mapOrigIdToOrigStrLbl.insert( map<int,string> :: value_type(*it2, lblcur) );
+            mapOrigIdToOrigStrLbl.insert(map<int, string>::value_type(*it2, lblToUse));
+            //cout << "mapOrigIdToOrigStrLbl: " << *it2 << ", lblToUse: " << lblToUse << endl;
+        }
     }
-  }
-  return res;
-}
 
-void PhylogenyTreeBasic ::GetAllLeafNodes(
-    vector<TreeNode *> &listLeafNodes) const {
-  listLeafNodes.clear();
+    // get all clades first
+    set<set<int>> setClades;
+    GetAllCladesById(setClades);
+    //cout << "All clades: \n";
+    //for(set<set<int> > :: iterator it = setClades.begin(); it != setClades.end(); ++it)
+    //{
+    //DumpIntSet(*it);
+    //}
+
+    // map the remaining id to 0,1,2....
+    map<int, int> mapIdToContinue;
+    map<int, string> mapContIdToOrigStr;
+    int idToUse = 0;
+    for (set<int>::iterator it = setSubsetLeaves.begin(); it != setSubsetLeaves.end(); ++it)
+    {
+        YW_ASSERT_INFO(mapOrigIdToOrigStrLbl.find(*it) != mapOrigIdToOrigStrLbl.end(), "Fail");
+        mapContIdToOrigStr.insert(map<int, string>::value_type(idToUse, mapOrigIdToOrigStrLbl[*it]));
+        //cout << "mapContIdToOrigStr: idtouse: " << idToUse << ", string orig: " << mapOrigIdToOrigStrLbl[*it] << endl;
+        mapIdToContinue.insert(map<int, int>::value_type(*it, idToUse++));
+    }
+
+    set<set<int>> setCladesSub;
+    // now extract those with only those given
+    for (set<set<int>>::iterator it = setClades.begin(); it != setClades.end(); ++it)
+    {
+        set<int> sintstep;
+        JoinSets(*it, setSubsetLeaves, sintstep);
+        if (sintstep.size() > 0)
+        {
+            // convert to continuios id first
+            set<int> sintstep2;
+            MapIntSetTo(sintstep, mapIdToContinue, sintstep2);
+
+            setCladesSub.insert(sintstep2);
 
-  PhylogenyTreeBasic &refSelf = const_cast<PhylogenyTreeBasic &>(*this);
-  PhylogenyTreeIterator itorTree(refSelf);
-  itorTree.Init();
-  while (itorTree.IsDone() == false) {
-    TreeNode *pn = itorTree.GetCurrNode();
-    itorTree.Next();
-    if (pn == NULL) {
-      break; // done with all nodes
+            //cout << "Adding a clade: ";
+            //DumpIntSet( sintstep2);
+            //cout << "for orig clade: ";
+            //DumpIntSet(sintstep);
+        }
     }
-    if (pn->IsLeaf() == true) {
-      listLeafNodes.push_back(pn);
+
+    // now build a tree with these labels
+    CreatePhyTreeWithRootedSplits(treeSubsetLeaves, setSubsetLeaves.size(), setCladesSub);
+
+    // now map the leaves of the new tree to the original ids
+    treeSubsetLeaves.AssignLeafLabels(mapContIdToOrigStr);
+
+    //cout << "This is the phylogenetic tree constructed from subset of leaves: "
+    //this->OutputGML("tree1.gml");
+    //treeSubsetLeaves.OutputGML("t1.gml");
+    //exit(1);
+}
+
+void PhylogenyTreeBasic ::AssignLeafLabels(const map<int, string> &mapLeafLbls)
+{
+    // assign labels stored in the map (format: node id to lbl)
+    vector<TreeNode *> listLeafNodes;
+    GetAllLeafNodes(listLeafNodes);
+    for (int i = 0; i < (int)listLeafNodes.size(); ++i)
+    {
+        int idn = listLeafNodes[i]->GetID();
+        map<int, string>::const_iterator itg = mapLeafLbls.find(idn);
+        YW_ASSERT_INFO(itg != mapLeafLbls.end(), "Fail");
+        string strLblNew = itg->second;
+        listLeafNodes[i]->SetLabel(strLblNew);
+        listLeafNodes[i]->SetUserLabel(strLblNew);
     }
-  }
 }
+void PhylogenyTreeBasic ::ReassignLeafLabels(const map<string, string> &mapLeafLbls)
+{
+    vector<TreeNode *> listLeafNodes;
+    GetAllLeafNodes(listLeafNodes);
+    for (int i = 0; i < (int)listLeafNodes.size(); ++i)
+    {
+        string str = listLeafNodes[i]->GetLabel();
+        //cout << "leaf label curr: " << str << endl;
+        map<string, string>::const_iterator itg = mapLeafLbls.find(str);
 
-void PhylogenyTreeBasic ::GetAllNodes(vector<TreeNode *> &listLeafNodes) const {
-  listLeafNodes.clear();
+        if (itg == mapLeafLbls.end())
+        {
+            // TBD. YW: for now. Need to look at later...
+            continue;
+        }
 
-  PhylogenyTreeBasic &refSelf = const_cast<PhylogenyTreeBasic &>(*this);
-  PhylogenyTreeIterator itorTree(refSelf);
-  itorTree.Init();
-  while (itorTree.IsDone() == false) {
-    TreeNode *pn = itorTree.GetCurrNode();
-    itorTree.Next();
-    if (pn == NULL) {
-      break; // done with all nodes
+        YW_ASSERT_INFO(itg != mapLeafLbls.end(), "Fail");
+        string strLblNew = itg->second;
+        listLeafNodes[i]->SetLabel(strLblNew);
+        listLeafNodes[i]->SetUserLabel(strLblNew);
     }
-    listLeafNodes.push_back(pn);
-  }
 }
 
-// remove all leaf nodes without taxa ids
-void PhylogenyTreeBasic ::CleanNonLabeledLeaves() {
-  // cout << "CleanNonLabeledLeaves:\n";
-  // mark all nodes that are on the path from a labeled leaf node to root
-  set<TreeNode *> setNodesNonredundent;
-
-  vector<TreeNode *> listLeafNodes;
-  GetAllLeafNodes(listLeafNodes);
-  for (int ii = 0; ii < (int)listLeafNodes.size(); ++ii) {
-    // cout << "Leaflabel: " << listLeafNodes[ii]->GetLabel() << endl;
-    if (listLeafNodes[ii]->GetLabel().empty() == true ||
-        listLeafNodes[ii]->GetLabel() == "-") {
-      //
-      // cout << "This leaf is REDUNDENT\n";
-      continue;
-    }
-
-    TreeNode *pncurr = listLeafNodes[ii];
-    while (pncurr != NULL &&
-           setNodesNonredundent.find(pncurr) == setNodesNonredundent.end()) {
-
-      //
-      setNodesNonredundent.insert(pncurr);
-
-      //
-      pncurr = pncurr->GetParent();
-    }
-  }
-
-  // now clean it by removing each node that does not appear in that
-  PhylogenyTreeIterator itorTree(*this);
-  itorTree.Init();
-  vector<TreeNode *> listNodesToClean;
-  while (itorTree.IsDone() == false) {
-    TreeNode *pn = itorTree.GetCurrNode();
-    itorTree.Next();
-    if (pn == NULL) {
-      break; // done with all nodes
-    }
-    // cout << "node id = " << pn->GetID() << endl;
+void PhylogenyTreeBasic ::SetUserLabelToCurrLabels()
+{
+    vector<TreeNode *> listLeafNodes;
+    GetAllLeafNodes(listLeafNodes);
+    for (int i = 0; i < (int)listLeafNodes.size(); ++i)
+    {
+        listLeafNodes[i]->SetUserLabel(listLeafNodes[i]->GetLabel());
+    }
+}
 
-    //
-    if (setNodesNonredundent.find(pn) == setNodesNonredundent.end()) {
-      // remove it
-      listNodesToClean.push_back(pn);
-    }
-  }
-  // now clean
-  for (int ii = 0; ii < (int)listNodesToClean.size(); ++ii) {
-    // cout << "Remove one node\n";
-    RemoveNode(listNodesToClean[ii]);
-  }
-}
-
-void PhylogenyTreeBasic ::RemoveNode(TreeNode *pn) {
-  // remove the node (but does not do anything to its descendent if it has; that
-  // is, we assume the node has no children)
-  YW_ASSERT_INFO(pn->IsLeaf() == true, "Wrong: it still have children");
-  TreeNode *pnpar = pn->GetParent();
-  if (pnpar != NULL) {
-    pnpar->RemoveChild(pn);
-  }
-  delete pn;
-}
-
-void PhylogenyTreeBasic ::RemoveNodeKeepChildren(TreeNode *pn) {
-  YW_ASSERT_INFO(pn != NULL, "null");
-  // cout << "RemoveNodeKeepChildren: pn: ";
-  // pn->Dump();
-
-  // remove node (and move all its children to be the nodes of the grand par
-  // YW: cannot remove the root this way
-  YW_ASSERT_INFO(pn != GetRoot(), "Cannot remove root this way");
-  TreeNode *pnpar = pn->GetParent();
-  YW_ASSERT_INFO(pnpar != NULL, "Wrong3");
-  pnpar->RemoveChild(pn);
-
-  for (int i = 0; i < pn->GetChildrenNum(); ++i) {
-    vector<int> emptyLbls;
-    pnpar->AddChild(pn->GetChild(i), emptyLbls);
-  }
-  pn->DetachAllChildren();
-  delete pn;
-
-  // remove newly created degree one node
-  RemoveDegreeOneNodeAt(pnpar);
-}
-void PhylogenyTreeBasic ::RemoveDegreeOneNodeAt(TreeNode *pn) {
-  // return;
-  // cout << "removing degree one node: ";
-  // pn->Dump();
-  // cout << "Current tree: ";
-  // this->Dump();
-  // exit(1);
-  // remove this node if it is a degree-1 node
-  int numChildren = pn->GetChildrenNum();
-  YW_ASSERT_INFO(numChildren >= 1, "Num of children: at least 1");
-  if (numChildren == 1) {
-    // if root, then delete it and re-set the root
-    if (pn == GetRoot()) {
-      // cout << "The degree one node is root!\n";
-      TreeNode *pnchild = pn->GetChild(0);
-      YW_ASSERT_INFO(pnchild != NULL, "pnchild: null");
-      // cout << "pnchild: ";
-      // pnchild->Dump();
-      pnchild->DetachSelf();
-      // cout << "After detach: root: ";
-      // pn->Dump();
-      // pn->DetachAllChildren();
-      pnchild->SetParent(NULL);
-      delete pn;
-      SetRootPlain(pnchild);
-    } else {
-      // then invoke the removekeepchild
-      RemoveNodeKeepChildren(pn);
-    }
-  }
-  // cout << "Done: RemoveDegreeOneNodeAt. Tree is now: ";
-  // this->Dump();
-}
-
-void PhylogenyTreeBasic ::RemoveDegreeOneNodes() {
-  //
-  vector<TreeNode *> listNodesAll;
-  this->GetAllNodes(listNodesAll);
-  for (int i = 0; i < (int)listNodesAll.size(); ++i) {
-    if (listNodesAll[i]->IsLeaf() == false) {
-      RemoveDegreeOneNodeAt(listNodesAll[i]);
-    }
-  }
-}
-
-void PhylogenyTreeBasic ::RemoveDescendentsFrom(set<TreeNode *> &setTreeNodes) {
-  // only keep those whose ancestor is ot in the set given
-  set<TreeNode *> setTreeNodeNew;
-  for (set<TreeNode *>::iterator it = setTreeNodes.begin();
-       it != setTreeNodes.end(); ++it) {
-    // check whether any of its parent is in the list
-    bool fKeep = true;
-    TreeNode *ppar = (*it)->GetParent();
-    while (ppar != NULL) {
-      if (setTreeNodes.find(ppar) != setTreeNodes.end()) {
-        fKeep = false;
-        break;
-      }
-      ppar = ppar->GetParent();
-    }
-    if (fKeep == true) {
-      setTreeNodeNew.insert(*it);
-    }
-  }
-  setTreeNodes = setTreeNodeNew;
-}
-
-// given a set of clusters (subsets of tree taxa), construct the corresponding
-// phylo trees YW: need to allow mulfurcating trees
-void PhylogenyTreeBasic ::ConsPhyTreeFromClusters(
-    const set<set<int> > &setClusters) {
-  // cout << "ConsPhyTreeFromClusters :: Cluseters: \n";
-  // for( set< set<int> > :: const_iterator it = setClusters.begin(); it !=
-  // setClusters.end(); ++it )
-  //{
-  // DumpIntSet( *it );
-  //}
-  // assume all leaves are given as singleton taxon. So first collect those
-  // singleton subsets
-  set<set<int> > setSubsetsActive;
-  TreeNode *nodeLast = NULL;
-  map<set<int>, TreeNode *> mapClusterToNode;
-  for (set<set<int> >::const_iterator it = setClusters.begin();
-       it != setClusters.end(); ++it) {
-    if (it->size() == 1) {
-      // add in setClusters
-      setSubsetsActive.insert(*it);
-      // also create nodes
-      TreeNode *pnode = new TreeNode(*(it->begin()));
-      char buf[100];
-      sprintf(buf, "%d", *(it->begin()));
-      string sbuf = buf;
-      pnode->SetLabel(sbuf);
-      nodeLast = pnode;
-      mapClusterToNode.insert(
-          map<set<int>, TreeNode *>::value_type(*it, pnode));
-    }
-  }
-  // setup num of leaves now
-  this->numLeaves = mapClusterToNode.size();
-
-  // need to allow mulfurcating trees
-  // approach: for each cluster, maintain a pointer that points to the cluster
-  // that is its parent then, each time, loop through to find all parents
-  map<set<int>, set<int> > mapClustrToPar;
-  // try to see whether we can create new nodes
-  for (set<set<int> >::iterator it1 = setClusters.begin();
-       it1 != setClusters.end(); ++it1) {
-    set<set<int> >::iterator it2 = setClusters.begin();
-    ++it2;
-    for (; it2 != setClusters.end(); ++it2) {
-      //
-      set<int> sLarger = *it1;
-      set<int> sSmaller = *it2;
-      if (sLarger.size() < sSmaller.size()) {
-        sLarger = *it2;
-        sSmaller = *it1;
-      }
-      // can these two coalesce into a single cluster known
-      if (sLarger.size() > sSmaller.size() &&
-          IsSetContainer(sLarger, sSmaller) == true) {
-        if (mapClustrToPar.find(sSmaller) == mapClustrToPar.end() ||
-            mapClustrToPar[sSmaller].size() > sLarger.size()) {
-          mapClustrToPar.erase(sSmaller);
-          mapClustrToPar.insert(
-              map<set<int>, set<int> >::value_type(sSmaller, sLarger));
-        }
-      }
-    }
-  }
-
-  // loop until there is only a single subset
-  while (setSubsetsActive.size() > 1) {
-    set<set<int> > setSubsetsActiveNext = setSubsetsActive;
-    // cout << "Current active sets: \n";
-    // for( set< set<int> > :: const_iterator it = setSubsetsActiveNext.begin();
-    // it != setSubsetsActiveNext.end(); ++it )
-    //{
-    // DumpIntSet( *it );
-    //}
-    // try to find several clusters that have the same parent cluster
-    // try to see whether we can create new nodes
-    map<set<int>, set<set<int> > > mapClusterCoal;
-    for (set<set<int> >::iterator it1 = setSubsetsActive.begin();
-         it1 != setSubsetsActive.end(); ++it1) {
-      // get parent
-      YW_ASSERT_INFO(mapClustrToPar.find(*it1) != mapClustrToPar.end(),
-                     "Cluster: not found");
-      if (mapClusterCoal.find(mapClustrToPar[*it1]) == mapClusterCoal.end()) {
-        set<set<int> > sempty;
-        mapClusterCoal.insert(map<set<int>, set<set<int> > >::value_type(
-            mapClustrToPar[*it1], sempty));
-      }
-      // cout << "Having child cluster: ";
-      // DumpIntSet( mapClustrToPar[*it1] );
-      // cout << ", for child ";
-      // DumpIntSet(*it1);
-      mapClusterCoal[mapClustrToPar[*it1]].insert(*it1);
-    }
-
-    // now process each record
-    for (map<set<int>, set<set<int> > >::iterator it2 = mapClusterCoal.begin();
-         it2 != mapClusterCoal.end(); ++it2) {
-      // YW_ASSERT_INFO( it2->second.size() > 1, "Must have at least two
-      // coalescing" );
-      // cout << "Set parent: ";
-      // DumpIntSet(it2->first);
-      set<int> sunion;
-      for (set<set<int> >::iterator it3 = it2->second.begin();
-           it3 != it2->second.end(); ++it3) {
-        // cout << "Set child: ";
-        // DumpIntSet(*it3);
-        // can these two coalesce into a single cluster known
-        UnionSets(sunion, *it3);
-      }
-      // cout << "sunion = ";
-      // DumpIntSet( sunion );
-      // ensure these do coal into some meaningful cluster
-      if (setClusters.find(sunion) == setClusters.end()) {
-        // cout << "This set not complete\n";
-        // this cluster not done yet
-        continue;
-      }
-
-      // create this new node
-      TreeNode *pnode = new TreeNode;
-      nodeLast = pnode;
-      for (set<set<int> >::iterator it3 = it2->second.begin();
-           it3 != it2->second.end(); ++it3) {
-        // cout << "Processing first subset: ";
-        // DumpIntSet( *it1 );
-        // cout << "Processing second subset: ";
-        // DumpIntSet( *it2 );
-        // these two add up to an input cluster and so create a new node for it
-        YW_ASSERT_INFO(mapClusterToNode.find(*it3) != mapClusterToNode.end(),
-                       "Fail1");
-        vector<int> emptyLabels;
-        pnode->AddChild(mapClusterToNode[*it3], emptyLabels);
-        setSubsetsActiveNext.erase(*it3);
-      }
-      mapClusterToNode.insert(
-          map<set<int>, TreeNode *>::value_type(sunion, pnode));
-      setSubsetsActiveNext.insert(sunion);
-      // cout << "Creating node: " << endl;
-    }
-    // must make progress
-    YW_ASSERT_INFO(setSubsetsActive != setSubsetsActiveNext,
-                   "Did not make progress");
-    setSubsetsActive = setSubsetsActiveNext;
-  }
-  YW_ASSERT_INFO(nodeLast != NULL, "nodeLast: NULL");
-  SetRoot(nodeLast);
+void PhylogenyTreeBasic ::SetLabelsToCurrUserLabels()
+{
+    vector<TreeNode *> listLeafNodes;
+    GetAllLeafNodes(listLeafNodes);
+    for (int i = 0; i < (int)listLeafNodes.size(); ++i)
+    {
+        listLeafNodes[i]->SetLabel(listLeafNodes[i]->GetUserLabel());
+    }
 }
 
-// find the set of clades in the subtree specified by the given leaf nodes
-void PhylogenyTreeBasic ::FindCladeOfSubsetLeaves(
-    const set<TreeNode *> &setLeaves, set<set<TreeNode *> > &setSubtreeClades) {
-  // caution: do not check whether these are true leaves
-  TreeNode *pRoot = this->GetRoot();
-  set<TreeNode *> setAllNodes;
-  pRoot->GetAllDescendents(setAllNodes);
-
-  //
-  for (set<TreeNode *>::iterator it = setAllNodes.begin();
-       it != setAllNodes.end(); ++it) {
-    //
-    set<TreeNode *> setLeavesUnder;
-    (*it)->GetAllLeavesUnder(setLeavesUnder);
-    set<TreeNode *> setLeavesSS;
-    JoinSetsGen(setLeavesUnder, setLeaves, setLeavesSS);
-    if (setLeavesSS.size() > 0) {
-      setSubtreeClades.insert(setLeavesSS);
+int PhylogenyTreeBasic ::GetMaxDegree() const
+{
+    int res = 0;
+
+    PhylogenyTreeBasic &thisTree = const_cast<PhylogenyTreeBasic &>(*this);
+    PhylogenyTreeIterator itor(thisTree);
+    itor.Init();
+    while (itor.IsDone() == false)
+    {
+        TreeNode *pn = itor.GetCurrNode();
+
+        int degThis = pn->GetChildrenNum();
+        if (degThis > res)
+        {
+            res = degThis;
+        }
+
+        itor.Next();
     }
-  }
+    return res;
 }
 
-// find the set of clades in the subtree specified by the given leaf nodes
-void PhylogenyTreeBasic ::FindCladeOfSubsetLeavesExact(
-    const set<TreeNode *> &setLeaves, set<set<TreeNode *> > &setSubtreeClades) {
-  // caution: do not check whether these are true leaves
-  TreeNode *pRoot = this->GetRoot();
-  set<TreeNode *> setAllNodes;
-  pRoot->GetAllDescendents(setAllNodes);
-
-  //
-  for (set<TreeNode *>::iterator it = setAllNodes.begin();
-       it != setAllNodes.end(); ++it) {
-    //
-    set<TreeNode *> setLeavesUnder;
-    (*it)->GetAllLeavesUnder(setLeavesUnder);
-    set<TreeNode *> setLeavesSS;
-    JoinSetsGen(setLeavesUnder, setLeaves, setLeavesSS);
-    if (setLeavesSS == setLeavesUnder) {
-      setSubtreeClades.insert(setLeavesSS);
-    }
-  }
-}
-
-void PhylogenyTreeBasic ::GroupLeavesToSubtrees(
-    const set<TreeNode *> &setLeaves,
-    const set<set<TreeNode *> > &cladeNodesToProc,
-    set<set<TreeNode *> > &setSubtreeClades) {
-  // group the leaves into subtrees (i.e. the subtrees contains exactly those
-  // appear in the leaves YW: note this is not the most realistic way (say you
-  // have one noisy leaf sepearting two otherwise fully connected catepillar
-  // tree, then the result willl be a lot more trees to use). But this servers
-  // as a starting point YW: here, we are given some subset out of some
-  // pre-specified leaf set, and some subsets (clades) over these leaves; we
-  // want to find the set of maximal clades containing partition these leaves
-  // TreeNode *pRoot = this->GetRoot();
-  // set<TreeNode *> setAllNodes;
-  // pRoot->GetAllDescendents(setAllNodes);
-
-  // order based on the size
-  map<int, set<set<TreeNode *> > > mapSubtreeSz;
-  // for( set<TreeNode *> :: iterator it = setAllNodes.begin(); it !=
-  // setAllNodes.end(); ++it)
-  for (set<set<TreeNode *> >::const_iterator it = cladeNodesToProc.begin();
-       it != cladeNodesToProc.end(); ++it) {
-    //
-    // set<TreeNode *> setLeavesUnder;
-    //(*it)->GetAllLeavesUnder( setLeavesUnder );
-    if (mapSubtreeSz.find(it->size()) == mapSubtreeSz.end()) {
-      set<set<TreeNode *> > ss;
-      mapSubtreeSz.insert(
-          map<int, set<set<TreeNode *> > >::value_type(it->size(), ss));
-    }
-    mapSubtreeSz[it->size()].insert(*it);
-  }
-
-  // reverse order
-  set<TreeNode *> setNodesProc = setLeaves;
-  for (map<int, set<set<TreeNode *> > >::reverse_iterator rit =
-           mapSubtreeSz.rbegin();
-       rit != mapSubtreeSz.rend(); ++rit) {
-    //
-    for (set<set<TreeNode *> >::iterator itg = rit->second.begin();
-         itg != rit->second.end(); ++itg) {
-      //
-      set<TreeNode *> setLeavesSS;
-      JoinSetsGen(*itg, setNodesProc, setLeavesSS);
-      if (setLeavesSS.size() == itg->size()) {
-        // find a good match here, use it
-        setSubtreeClades.insert(*itg);
-        SubtractSetsGen(setNodesProc, *itg);
-      }
-    }
-    if (setNodesProc.size() == 0) {
-      break;
-    }
-  }
-  YW_ASSERT_INFO(setNodesProc.size() == 0, "Fail to classify all subtrees");
-}
-
-void PhylogenyTreeBasic ::GroupLeavesToSubtreesSamePar(
-    const set<TreeNode *> &setLeaves,
-    const set<set<TreeNode *> > &cladeNodesToProc,
-    set<set<TreeNode *> > &setSubtreeClades) {
-  // group leaves that form subtrees w/ same parents. Difference from above: for
-  // two subtrees that share the same parent but could be other branches, put
-  // the together
-  GroupLeavesToSubtrees(setLeaves, cladeNodesToProc, setSubtreeClades);
-  // now see whether we can combine subtrees s.t. the combined one is still
-  // contined in some parent
-  map<set<TreeNode *>, set<TreeNode *> > mapSubtreesToPar;
-  for (set<set<TreeNode *> >::iterator it = setSubtreeClades.begin();
-       it != setSubtreeClades.end(); ++it) {
-    for (set<set<TreeNode *> >::iterator itg = cladeNodesToProc.begin();
-         itg != cladeNodesToProc.end(); ++itg) {
-      //
-      if (*itg != *it && itg->size() > it->size() &&
-          (mapSubtreesToPar.find(*it) == mapSubtreesToPar.end() ||
-           mapSubtreesToPar[*it].size() > itg->size())) {
-        //
-        set<TreeNode *> sint;
-        JoinSetsGen(*itg, *it, sint);
-        if (sint.size() == it->size()) {
-          //
-          if (mapSubtreesToPar.find(*it) == mapSubtreesToPar.end()) {
-            mapSubtreesToPar.insert(
-                map<set<TreeNode *>, set<TreeNode *> >::value_type(*it, *itg));
-          } else {
-            mapSubtreesToPar[*it] = *itg;
-          }
-        }
-      }
-    }
-  }
-  map<set<TreeNode *>, set<TreeNode *> > mapRevParToSubtrees;
-  for (map<set<TreeNode *>, set<TreeNode *> >::iterator it =
-           mapSubtreesToPar.begin();
-       it != mapSubtreesToPar.end(); ++it) {
-    //
-    if (mapRevParToSubtrees.find(it->second) == mapRevParToSubtrees.end()) {
-      mapRevParToSubtrees.insert(
-          map<set<TreeNode *>, set<TreeNode *> >::value_type(it->second,
-                                                             it->first));
-    } else {
-      UnionSetsGen(mapRevParToSubtrees[it->second], it->first);
-    }
-  }
-  setSubtreeClades.clear();
-  for (map<set<TreeNode *>, set<TreeNode *> >::iterator it =
-           mapRevParToSubtrees.begin();
-       it != mapRevParToSubtrees.end(); ++it) {
-    setSubtreeClades.insert(it->second);
-  }
-}
-
-void PhylogenyTreeBasic ::GetAllClades(set<set<int> > &setClades) {
-  //
-  setClades.clear();
-  // now clean it by removing each node that does not appear in that
-  PhylogenyTreeIterator itorTree(*this);
-  itorTree.Init();
-  while (itorTree.IsDone() == false) {
-    TreeNode *pn = itorTree.GetCurrNode();
-    itorTree.Next();
-    if (pn == NULL) {
-      break; // done with all nodes
-    }
-    // cout << "node id = " << pn->GetID() << endl;
-    set<TreeNode *> setDescendents;
-    pn->GetAllLeavesUnder(setDescendents);
-    set<int> sint;
-    for (set<TreeNode *>::iterator itg = setDescendents.begin();
-         itg != setDescendents.end(); ++itg) {
-      sint.insert((*itg)->GetIntLabel());
-    }
-    setClades.insert(sint);
-  }
-}
-
-void PhylogenyTreeBasic ::GetAllCladesList(vector<set<int> > &listClades) {
-  listClades.clear();
-  // now clean it by removing each node that does not appear in that
-  PhylogenyTreeIterator itorTree(*this);
-  itorTree.Init();
-  while (itorTree.IsDone() == false) {
-    TreeNode *pn = itorTree.GetCurrNode();
-    itorTree.Next();
-    if (pn == NULL) {
-      break; // done with all nodes
-    }
-    // cout << "node id = " << pn->GetID() << endl;
-    set<TreeNode *> setDescendents;
-    pn->GetAllLeavesUnder(setDescendents);
-    set<int> sint;
-    for (set<TreeNode *>::iterator itg = setDescendents.begin();
-         itg != setDescendents.end(); ++itg) {
-      sint.insert((*itg)->GetIntLabel());
-    }
-    listClades.push_back(sint);
-  }
-}
-
-// different from the above, (1) we allow duplicate int-labels (and thus
-// multiset) (2) group clades by common parents
-void PhylogenyTreeBasic ::GetAllCladeGroupsIntLabel(
-    multiset<multiset<multiset<int> > > &setCladeGroupsDupLabels,
-    multiset<int> &rootClade) {
-  // group all clades by parent nodes (i.e. clades with same parent are in one
-  // class) root clade: the one with all leaves
-  map<TreeNode *, multiset<multiset<int> > > mapCladeGroupsForNode;
-
-  //
-  setCladeGroupsDupLabels.clear();
-  rootClade.clear();
-  // now clean it by removing each node that does not appear in that
-  PhylogenyTreeIterator itorTree(*this);
-  itorTree.Init();
-  while (itorTree.IsDone() == false) {
-    TreeNode *pn = itorTree.GetCurrNode();
-    itorTree.Next();
-    if (pn == NULL) {
-      break; // done with all nodes
-    }
-    // cout << "node id = " << pn->GetID() << endl;
-    set<TreeNode *> setDescendents;
-    pn->GetAllLeavesUnder(setDescendents);
-    multiset<int> sint;
-    for (set<TreeNode *>::iterator itg = setDescendents.begin();
-         itg != setDescendents.end(); ++itg) {
-      sint.insert((*itg)->GetIntLabel());
-    }
-    TreeNode *pnPar = pn->GetParent();
-    if (pnPar == NULL) {
-      // this is the root clade
-      rootClade = sint;
-    } else {
-      if (mapCladeGroupsForNode.find(pnPar) == mapCladeGroupsForNode.end()) {
-        multiset<multiset<int> > mms;
-        mapCladeGroupsForNode.insert(
-            map<TreeNode *, multiset<multiset<int> > >::value_type(pnPar, mms));
-      }
-      mapCladeGroupsForNode[pnPar].insert(sint);
-    }
-  }
-  YW_ASSERT_INFO(rootClade.size() > 0, "Fail to collect root clade");
-  for (map<TreeNode *, multiset<multiset<int> > >::iterator it =
-           mapCladeGroupsForNode.begin();
-       it != mapCladeGroupsForNode.end(); ++it) {
-    //
-    setCladeGroupsDupLabels.insert(it->second);
-  }
-}
-
-void PhylogenyTreeBasic ::GetAllCladesById(set<set<int> > &setClades) {
-  //
-  setClades.clear();
-  // now clean it by removing each node that does not appear in that
-  PhylogenyTreeIterator itorTree(*this);
-  itorTree.Init();
-  while (itorTree.IsDone() == false) {
-    TreeNode *pn = itorTree.GetCurrNode();
-    itorTree.Next();
-    if (pn == NULL) {
-      break; // done with all nodes
-    }
-    // cout << "node id = " << pn->GetID() << endl;
-    set<TreeNode *> setDescendents;
-    pn->GetAllLeavesUnder(setDescendents);
-    set<int> sint;
-    for (set<TreeNode *>::iterator itg = setDescendents.begin();
-         itg != setDescendents.end(); ++itg) {
-      sint.insert((*itg)->GetID());
-    }
-    setClades.insert(sint);
-  }
-}
-
-void PhylogenyTreeBasic ::GetAllCladeNodess(set<set<TreeNode *> > &setClades) {
-  //
-  setClades.clear();
-  // now clean it by removing each node that does not appear in that
-  PhylogenyTreeIterator itorTree(*this);
-  itorTree.Init();
-  while (itorTree.IsDone() == false) {
-    TreeNode *pn = itorTree.GetCurrNode();
-    itorTree.Next();
-    if (pn == NULL) {
-      break; // done with all nodes
-    }
-    // cout << "node id = " << pn->GetID() << endl;
-    set<TreeNode *> setDescendents;
-    pn->GetAllLeavesUnder(setDescendents);
-
-    setClades.insert(setDescendents);
-  }
-}
-
-TreeNode *PhylogenyTreeBasic ::GetSubtreeRootForLeaves(
-    const set<TreeNode *> &setLvNodes) {
-  PhylogenyTreeIterator itorTree(*this);
-  itorTree.Init();
-  while (itorTree.IsDone() == false) {
-    TreeNode *pn = itorTree.GetCurrNode();
-    itorTree.Next();
-    if (pn == NULL) {
-      break; // done with all nodes
-    }
-    // cout << "node id = " << pn->GetID() << endl;
-    set<TreeNode *> setDescendents;
-    pn->GetAllLeavesUnder(setDescendents);
-
-    if (setLvNodes == setDescendents) {
-      return pn;
-    }
-  }
-  return NULL;
-}
-
-void PhylogenyTreeBasic ::GroupNodesWithCommonPars(
-    const set<TreeNode *> &setNodes,
-    map<TreeNode *, set<TreeNode *> > &mapNodesWithSamePar) {
-  //
-  mapNodesWithSamePar.clear();
-  for (set<TreeNode *>::const_iterator it = setNodes.begin();
-       it != setNodes.end(); ++it) {
-    //
-    TreeNode *ppar = (*it)->GetParent();
-    if (mapNodesWithSamePar.find(ppar) == mapNodesWithSamePar.end()) {
-      set<TreeNode *> ss;
-      mapNodesWithSamePar.insert(
-          map<TreeNode *, set<TreeNode *> >::value_type(ppar, ss));
-    }
-    mapNodesWithSamePar[ppar].insert(*it);
-  }
-}
-
-void PhylogenyTreeBasic ::RemoveEdgeLabels() {
-  //
-  this->rootNode->RemoveLabels();
-}
-
-void PhylogenyTreeBasic ::RemoveEdgeLabelsToLeaves() {
-  // get all leaves
-  vector<TreeNode *> vecLeaves;
-  GetAllLeafNodes(vecLeaves);
-  for (int i = 0; i < (int)vecLeaves.size(); ++i) {
-    vecLeaves[i]->RemoveLabelsPar();
-  }
-}
-
-void PhylogenyTreeBasic ::IncEdgeLabelsBy(int offset) {
-  // inc edge label of this node (and subtree if needed)
-  this->rootNode->IncEdgeLabelsBy(offset, true);
-}
-
-string PhylogenyTreeBasic ::GetShapeLabelNodeBrNum(
-    map<TreeNode *, pair<int, int> > &mapNodeNumBrannches,
-    vector<int> &listOrderedLeaves) {
-  // format: <num of underlying branches, event id>, negative for internal nodes
-  map<TreeNode *, pair<int, int> > mapNodeNumBrannchesUse = mapNodeNumBrannches;
-  // given: num of branches at each node,
-  // return shape label as empty Newick format
-  // for this, first need to find out all nodes that all descendents have
-  // appeared in the tree
-  set<TreeNode *> setAncesNotGiven;
-  for (map<TreeNode *, pair<int, int> >::iterator it =
-           mapNodeNumBrannches.begin();
-       it != mapNodeNumBrannches.end(); ++it) {
-    set<TreeNode *> setAllAnces;
-    it->first->GetAllAncestors(setAllAnces);
-    for (set<TreeNode *>::iterator itg = setAllAnces.begin();
-         itg != setAllAnces.end(); ++itg) {
-      if (mapNodeNumBrannches.find(*itg) == mapNodeNumBrannches.end()) {
-        //
-        pair<int, int> pp(-1, -1);
-        mapNodeNumBrannchesUse.insert(
-            map<TreeNode *, pair<int, int> >::value_type(*itg, pp));
-      }
-    }
-  }
-  // now call the root to find the label
-  return this->rootNode->GetShapeLabelNodeBrNum(mapNodeNumBrannchesUse,
-                                                listOrderedLeaves);
-}
-
-void PhylogenyTreeBasic ::MakeSubtreeUnrefined(TreeNode *pSubtree) {
-  // make this subtree unrefined (i.e. each leaf points to the root
-  // CAUTION: all edge labels are LOST!!!!
-  set<TreeNode *> setAllLeavesUnder;
-  pSubtree->GetAllLeavesUnder(setAllLeavesUnder);
-  // cout << "setAllLeavesUnder: ";
-  // for( set<TreeNode *> :: iterator it = setAllLeavesUnder.begin(); it !=
-  // setAllLeavesUnder.end(); ++it)
-  //{
-  //(*it)->Dump();
-  //}
-  // cout << endl;
-  set<TreeNode *> setAllDescUnder;
-  pSubtree->GetAllDescendents(setAllDescUnder);
-  // cout << "setAllDescUnder: ";
-  // for( set<TreeNode *> :: iterator it = setAllDescUnder.begin(); it !=
-  // setAllDescUnder.end(); ++it)
-  //{
-  //(*it)->Dump();
-  //}
-  // cout << endl;
-
-  // detach all leaves from their parent
-  for (set<TreeNode *>::iterator it = setAllLeavesUnder.begin();
-       it != setAllLeavesUnder.end(); ++it) {
-    //
-    TreeNode *ppar = (*it)->GetParent();
-    ppar->RemoveChild(*it);
-  }
-
-  pSubtree->RemoveAllChildren();
-
-  // remove all descendent except the leaves
-  for (set<TreeNode *>::iterator it = setAllDescUnder.begin();
-       it != setAllDescUnder.end(); ++it) {
-    // need to be careful b/c node deletion is recurisvely
-    if (setAllLeavesUnder.find(*it) == setAllLeavesUnder.end() &&
-        (*it) != pSubtree && ((*it)->GetParent() == pSubtree)) {
-      // cout << "Delete this node: ";
-      //(*it)->Dump();
-      delete *it;
-    }
-  }
-  // then add the leaves directly under the subtree root
-  for (set<TreeNode *>::iterator it = setAllLeavesUnder.begin();
-       it != setAllLeavesUnder.end(); ++it) {
-    vector<int> lblEmpty;
-    pSubtree->AddChild(*it, lblEmpty);
-  }
-  // string strTree;
-  // ConsNewick(strTree);
-  // cout << "After MakeSubtreeUnrefiined: tree is " << strTree << endl;
-}
-
-void PhylogenyTreeBasic ::Binarize() {
-  // make the tree binary
-  int idToUseNext = this->rootNode->GetMaxIdWithinSubtree() + 1;
-  this->rootNode->Binarize(idToUseNext);
-  // string strTree;
-  // ConsNewick(strTree);
-  // cout << "After binarization: tree is " << strTree << endl;
-}
-
-void PhylogenyTreeBasic ::CreatePhyTreeFromLeavesWithLabels(
-    const set<string> &setLeafLabels, PhylogenyTreeBasic &treeSubsetLeaves,
-    bool fUseOldTaxonName) {
-  // given a set of leaf labels, construct another phylogenetic tree that is
-  // extracted from the current tree by only taking those leaves with one of the
-  // given labels YW: caution: all taxa names are mapped to 0,1,2,... according
-  // to their order in list if fUseOldTaxonName=false otherwise, keep the
-  // original flag
-  set<int> setSubsetLeaves;
-  map<int, string> mapOrigIdToOrigStrLbl;
-  int idToUseFirst = 0;
-  for (set<string>::const_iterator it = setLeafLabels.begin();
-       it != setLeafLabels.end(); ++it) {
-    string lblcur = *it;
-    set<int> setSubsetLeavesStep;
-    GetLeavesIdsWithLabel(lblcur, setSubsetLeavesStep);
-    // cout << "CreatePhyTreeFromLeavesWithLabels: lblcur: " << lblcur <<"
-    // setSubsetLeavesStep: "; DumpIntSet(setSubsetLeavesStep);
-    UnionSets(setSubsetLeaves, setSubsetLeavesStep);
-
-    string lblToUse = lblcur;
-    if (fUseOldTaxonName == false) {
-      char buf[100];
-      sprintf(buf, "%d", idToUseFirst++);
-      lblToUse = buf;
-    }
-    for (set<int>::iterator it2 = setSubsetLeavesStep.begin();
-         it2 != setSubsetLeavesStep.end(); ++it2) {
-      // mapOrigIdToOrigStrLbl.insert( map<int,string> :: value_type(*it2,
-      // lblcur) );
-      mapOrigIdToOrigStrLbl.insert(
-          map<int, string>::value_type(*it2, lblToUse));
-      // cout << "mapOrigIdToOrigStrLbl: " << *it2 << ", lblToUse: " << lblToUse
-      // << endl;
-    }
-  }
-
-  // get all clades first
-  set<set<int> > setClades;
-  GetAllCladesById(setClades);
-  // cout << "All clades: \n";
-  // for(set<set<int> > :: iterator it = setClades.begin(); it !=
-  // setClades.end(); ++it)
-  //{
-  // DumpIntSet(*it);
-  //}
-
-  // map the remaining id to 0,1,2....
-  map<int, int> mapIdToContinue;
-  map<int, string> mapContIdToOrigStr;
-  int idToUse = 0;
-  for (set<int>::iterator it = setSubsetLeaves.begin();
-       it != setSubsetLeaves.end(); ++it) {
-    YW_ASSERT_INFO(
-        mapOrigIdToOrigStrLbl.find(*it) != mapOrigIdToOrigStrLbl.end(), "Fail");
-    mapContIdToOrigStr.insert(
-        map<int, string>::value_type(idToUse, mapOrigIdToOrigStrLbl[*it]));
-    // cout << "mapContIdToOrigStr: idtouse: " << idToUse << ", string orig: "
-    // << mapOrigIdToOrigStrLbl[*it] << endl;
-    mapIdToContinue.insert(map<int, int>::value_type(*it, idToUse++));
-  }
-
-  set<set<int> > setCladesSub;
-  // now extract those with only those given
-  for (set<set<int> >::iterator it = setClades.begin(); it != setClades.end();
-       ++it) {
-    set<int> sintstep;
-    JoinSets(*it, setSubsetLeaves, sintstep);
-    if (sintstep.size() > 0) {
-      // convert to continuios id first
-      set<int> sintstep2;
-      MapIntSetTo(sintstep, mapIdToContinue, sintstep2);
-
-      setCladesSub.insert(sintstep2);
-
-      // cout << "Adding a clade: ";
-      // DumpIntSet( sintstep2);
-      // cout << "for orig clade: ";
-      // DumpIntSet(sintstep);
-    }
-  }
-
-  // now build a tree with these labels
-  CreatePhyTreeWithRootedSplits(treeSubsetLeaves, setSubsetLeaves.size(),
-                                setCladesSub);
-
-  // now map the leaves of the new tree to the original ids
-  treeSubsetLeaves.AssignLeafLabels(mapContIdToOrigStr);
-
-  // cout << "This is the phylogenetic tree constructed from subset of leaves: "
-  // this->OutputGML("tree1.gml");
-  // treeSubsetLeaves.OutputGML("t1.gml");
-  // exit(1);
-}
-
-void PhylogenyTreeBasic ::AssignLeafLabels(
-    const map<int, string> &mapLeafLbls) {
-  // assign labels stored in the map (format: node id to lbl)
-  vector<TreeNode *> listLeafNodes;
-  GetAllLeafNodes(listLeafNodes);
-  for (int i = 0; i < (int)listLeafNodes.size(); ++i) {
-    int idn = listLeafNodes[i]->GetID();
-    map<int, string>::const_iterator itg = mapLeafLbls.find(idn);
-    YW_ASSERT_INFO(itg != mapLeafLbls.end(), "Fail");
-    string strLblNew = itg->second;
-    listLeafNodes[i]->SetLabel(strLblNew);
-    listLeafNodes[i]->SetUserLabel(strLblNew);
-  }
-}
-void PhylogenyTreeBasic ::ReassignLeafLabels(
-    const map<string, string> &mapLeafLbls) {
-  vector<TreeNode *> listLeafNodes;
-  GetAllLeafNodes(listLeafNodes);
-  for (int i = 0; i < (int)listLeafNodes.size(); ++i) {
-    string str = listLeafNodes[i]->GetLabel();
-    // cout << "leaf label curr: " << str << endl;
-    map<string, string>::const_iterator itg = mapLeafLbls.find(str);
-
-    if (itg == mapLeafLbls.end()) {
-      // TBD. YW: for now. Need to look at later...
-      continue;
-    }
-
-    YW_ASSERT_INFO(itg != mapLeafLbls.end(), "Fail");
-    string strLblNew = itg->second;
-    listLeafNodes[i]->SetLabel(strLblNew);
-    listLeafNodes[i]->SetUserLabel(strLblNew);
-  }
-}
-
-void PhylogenyTreeBasic ::SetUserLabelToCurrLabels() {
-  vector<TreeNode *> listLeafNodes;
-  GetAllLeafNodes(listLeafNodes);
-  for (int i = 0; i < (int)listLeafNodes.size(); ++i) {
-    listLeafNodes[i]->SetUserLabel(listLeafNodes[i]->GetLabel());
-  }
-}
-
-void PhylogenyTreeBasic ::SetLabelsToCurrUserLabels() {
-  vector<TreeNode *> listLeafNodes;
-  GetAllLeafNodes(listLeafNodes);
-  for (int i = 0; i < (int)listLeafNodes.size(); ++i) {
-    listLeafNodes[i]->SetLabel(listLeafNodes[i]->GetUserLabel());
-  }
-}
-
-int PhylogenyTreeBasic ::GetMaxDegree() const {
-  int res = 0;
-
-  PhylogenyTreeBasic &thisTree = const_cast<PhylogenyTreeBasic &>(*this);
-  PhylogenyTreeIterator itor(thisTree);
-  itor.Init();
-  while (itor.IsDone() == false) {
-    TreeNode *pn = itor.GetCurrNode();
-
-    int degThis = pn->GetChildrenNum();
-    if (degThis > res) {
-      res = degThis;
-    }
-
-    itor.Next();
-  }
-  return res;
-}
-
-void PhylogenyTreeBasic ::Dump() const {
-  // dump all nodes
-  PhylogenyTreeBasic &thisTree = const_cast<PhylogenyTreeBasic &>(*this);
-  PhylogenyTreeIterator itor(thisTree);
-  itor.Init();
-  while (itor.IsDone() == false) {
-    TreeNode *pn = itor.GetCurrNode();
-    pn->Dump();
-    cout << endl;
-    itor.Next();
-  }
+void PhylogenyTreeBasic ::Dump() const
+{
+    // dump all nodes
+    PhylogenyTreeBasic &thisTree = const_cast<PhylogenyTreeBasic &>(*this);
+    PhylogenyTreeIterator itor(thisTree);
+    itor.Init();
+    while (itor.IsDone() == false)
+    {
+        TreeNode *pn = itor.GetCurrNode();
+        pn->Dump();
+        cout << endl;
+        itor.Next();
+    }
 }
 
-void PhylogenyTreeBasic ::GetSubtreesWithMaxSize(set<TreeNode *> &setSTRoots,
-                                                 int maxSzSubtree) const {
+void PhylogenyTreeBasic ::GetSubtreesWithMaxSize(set<TreeNode *> &setSTRoots, int maxSzSubtree) const
+{
 #if 0
     // YW: this piece of code is not used
     set<TreeNode *> setSTRootsStep;
@@ -4783,86 +5229,99 @@ cout << endl;
     GetSubtreesWithMaxSizeExcludeTaxa( setSTRoots, maxSzSubtree, setLblsExc);
 #endif
 
-  //#if 0
-  // retrieve roots of subtrees that are no biggere than the specified size
-  setSTRoots.clear();
-  stack<TreeNode *> stackTrNodes;
-  stackTrNodes.push(this->GetRoot());
+    //#if 0
+    // retrieve roots of subtrees that are no biggere than the specified size
+    setSTRoots.clear();
+    stack<TreeNode *> stackTrNodes;
+    stackTrNodes.push(this->GetRoot());
+
+    while (stackTrNodes.size() > 0)
+    {
+        //
+        TreeNode *pncurr = stackTrNodes.top();
+        stackTrNodes.pop();
+
+        // save it if this subtree size is not too big
+        set<TreeNode *> setDescendents;
+        pncurr->GetAllLeavesUnder(setDescendents);
+        //cout << "pncur: number of descendents: " << setDescendents.size() << " ";
+        //pncurr->Dump();
+        if ((int)setDescendents.size() <= maxSzSubtree)
+        {
+            //cout << "Adding tis node.\n";
+            setSTRoots.insert(pncurr);
+        }
+        else
+        {
+            //cout << "Process each of its descendents.\n";
+            // then check all its descendents
+            for (int i = 0; i < pncurr->GetChildrenNum(); ++i)
+            {
+                TreeNode *pnc = pncurr->GetChild(i);
+                stackTrNodes.push(pnc);
+                //cout << "pushing child: ";
+                //pnc->Dump();
+            }
+        }
+    }
+    //#endif
+}
+
+void PhylogenyTreeBasic ::GetMaxSubtrees(set<TreeNode *> &setSTRootsIdents)
+{
+    // obtain max subtrees with identical leaf labels
+    setSTRootsIdents.clear();
+    stack<TreeNode *> stackNodes;
+    stackNodes.push(GetRoot());
+    while (stackNodes.empty() == false)
+    {
+        //
+        TreeNode *pncurr = stackNodes.top();
+        stackNodes.pop();
+
+        vector<string> strLblLeaves;
+        pncurr->GetAllLeafLabeles(strLblLeaves);
+        set<string> strLblLeavesSet;
+        PopulateSetByVecGen(strLblLeavesSet, strLblLeaves);
+        YW_ASSERT_INFO(strLblLeavesSet.size() >= 1, "Must have at least one label");
+        if (strLblLeavesSet.size() == 1)
+        {
+            //
+            setSTRootsIdents.insert(pncurr);
+        }
+        else
+        {
+            // consider all children
+            for (int i = 0; i < pncurr->GetChildrenNum(); ++i)
+            {
+                stackNodes.push(pncurr->GetChild(i));
+            }
+        }
+    }
+}
+
+bool PhylogenyTreeBasic ::GetSiblingsPairFrom(const set<TreeNode *> &setNodesToChoose, pair<TreeNode *, TreeNode *> &pairSibs)
+{
+    // find which pairs of given nodes have the same paent
+    bool fres = false;
 
-  while (stackTrNodes.size() > 0) {
-    //
-    TreeNode *pncurr = stackTrNodes.top();
-    stackTrNodes.pop();
-
-    // save it if this subtree size is not too big
-    set<TreeNode *> setDescendents;
-    pncurr->GetAllLeavesUnder(setDescendents);
-    // cout << "pncur: number of descendents: " << setDescendents.size() << " ";
-    // pncurr->Dump();
-    if ((int)setDescendents.size() <= maxSzSubtree) {
-      // cout << "Adding tis node.\n";
-      setSTRoots.insert(pncurr);
-    } else {
-      // cout << "Process each of its descendents.\n";
-      // then check all its descendents
-      for (int i = 0; i < pncurr->GetChildrenNum(); ++i) {
-        TreeNode *pnc = pncurr->GetChild(i);
-        stackTrNodes.push(pnc);
-        // cout << "pushing child: ";
-        // pnc->Dump();
-      }
-    }
-  }
-  //#endif
-}
-
-void PhylogenyTreeBasic ::GetMaxSubtrees(set<TreeNode *> &setSTRootsIdents) {
-  // obtain max subtrees with identical leaf labels
-  setSTRootsIdents.clear();
-  stack<TreeNode *> stackNodes;
-  stackNodes.push(GetRoot());
-  while (stackNodes.empty() == false) {
     //
-    TreeNode *pncurr = stackNodes.top();
-    stackNodes.pop();
-
-    vector<string> strLblLeaves;
-    pncurr->GetAllLeafLabeles(strLblLeaves);
-    set<string> strLblLeavesSet;
-    PopulateSetByVecGen(strLblLeavesSet, strLblLeaves);
-    YW_ASSERT_INFO(strLblLeavesSet.size() >= 1, "Must have at least one label");
-    if (strLblLeavesSet.size() == 1) {
-      //
-      setSTRootsIdents.insert(pncurr);
-    } else {
-      // consider all children
-      for (int i = 0; i < pncurr->GetChildrenNum(); ++i) {
-        stackNodes.push(pncurr->GetChild(i));
-      }
-    }
-  }
-}
-
-bool PhylogenyTreeBasic ::GetSiblingsPairFrom(
-    const set<TreeNode *> &setNodesToChoose,
-    pair<TreeNode *, TreeNode *> &pairSibs) {
-  // find which pairs of given nodes have the same paent
-  bool fres = false;
-
-  //
-  map<TreeNode *, TreeNode *> mapParToOrigNode;
-  for (set<TreeNode *>::const_iterator it = setNodesToChoose.begin();
-       it != setNodesToChoose.end(); ++it) {
-    TreeNode *pp = (*it)->GetParent();
-    if (mapParToOrigNode.find(pp) == mapParToOrigNode.end()) {
-      mapParToOrigNode.insert(map<TreeNode *, TreeNode *>::value_type(pp, *it));
-    } else {
-      pairSibs.first = mapParToOrigNode[pp];
-      pairSibs.second = *it;
-      fres = true;
-      break;
-    }
-  }
+    map<TreeNode *, TreeNode *> mapParToOrigNode;
+    for (set<TreeNode *>::const_iterator it = setNodesToChoose.begin(); it != setNodesToChoose.end(); ++it)
+    {
+        TreeNode *pp = (*it)->GetParent();
+        if (mapParToOrigNode.find(pp) == mapParToOrigNode.end())
+        {
+            mapParToOrigNode.insert(map<TreeNode *, TreeNode *>::value_type(pp, *it));
+        }
+        else
+        {
+            pairSibs.first = mapParToOrigNode[pp];
+            pairSibs.second = *it;
+            fres = true;
+            break;
+        }
+    }
 #if 0
 cout << "GetSiblingsPairFrom: \n";
 if( fres == true )
@@ -4874,54 +5333,54 @@ pairSibs.second->Dump();
 }
 #endif
 
-  return fres;
-}
-
-bool PhylogenyTreeBasic ::GetSiblingsNodesFrom(
-    const set<TreeNode *> &setNodesToChoose, set<TreeNode *> &setSibs) {
-  // find which nodes from given nodes have the same paent
-  // YW: we prefer the lower if there are multiple choices
-  bool fres = false;
-
-  //
-  map<TreeNode *, set<TreeNode *> > mapParToOrigNode;
-  for (set<TreeNode *>::const_iterator it = setNodesToChoose.begin();
-       it != setNodesToChoose.end(); ++it) {
-    TreeNode *pp = (*it)->GetParent();
-    if (mapParToOrigNode.find(pp) == mapParToOrigNode.end()) {
-      set<TreeNode *> ss;
-      mapParToOrigNode.insert(
-          map<TreeNode *, set<TreeNode *> >::value_type(pp, ss));
-    }
-    mapParToOrigNode[pp].insert(*it);
-  }
-  // assign one with at least two nodes
-  for (map<TreeNode *, set<TreeNode *> >::iterator it =
-           mapParToOrigNode.begin();
-       it != mapParToOrigNode.end(); ++it) {
-    if (it->second.size() > 1) {
-      bool fGood = true;
-
-      for (map<TreeNode *, set<TreeNode *> >::iterator it2 =
-               mapParToOrigNode.begin();
-           it2 != mapParToOrigNode.end(); ++it2) {
-        //
-        int dummy;
-        if (it->first != it2->first &&
-            (it->first)->IsAncesterOf(it2->first, dummy) == true) {
-          // this one is not lowest
-          fGood = false;
-          break;
+    return fres;
+}
+
+bool PhylogenyTreeBasic ::GetSiblingsNodesFrom(const set<TreeNode *> &setNodesToChoose, set<TreeNode *> &setSibs)
+{
+    // find which nodes from given nodes have the same paent
+    // YW: we prefer the lower if there are multiple choices
+    bool fres = false;
+
+    //
+    map<TreeNode *, set<TreeNode *>> mapParToOrigNode;
+    for (set<TreeNode *>::const_iterator it = setNodesToChoose.begin(); it != setNodesToChoose.end(); ++it)
+    {
+        TreeNode *pp = (*it)->GetParent();
+        if (mapParToOrigNode.find(pp) == mapParToOrigNode.end())
+        {
+            set<TreeNode *> ss;
+            mapParToOrigNode.insert(map<TreeNode *, set<TreeNode *>>::value_type(pp, ss));
         }
-      }
+        mapParToOrigNode[pp].insert(*it);
+    }
+    // assign one with at least two nodes
+    for (map<TreeNode *, set<TreeNode *>>::iterator it = mapParToOrigNode.begin(); it != mapParToOrigNode.end(); ++it)
+    {
+        if (it->second.size() > 1)
+        {
+            bool fGood = true;
+
+            for (map<TreeNode *, set<TreeNode *>>::iterator it2 = mapParToOrigNode.begin(); it2 != mapParToOrigNode.end(); ++it2)
+            {
+                //
+                int dummy;
+                if (it->first != it2->first && (it->first)->IsAncesterOf(it2->first, dummy) == true)
+                {
+                    // this one is not lowest
+                    fGood = false;
+                    break;
+                }
+            }
 
-      if (fGood) {
-        setSibs = it->second;
-        fres = true;
-        break;
-      }
+            if (fGood)
+            {
+                setSibs = it->second;
+                fres = true;
+                break;
+            }
+        }
     }
-  }
 
 #if 0
     cout << "GetSiblingsPairFrom: \n";
@@ -4934,51 +5393,49 @@ bool PhylogenyTreeBasic ::GetSiblingsNodesFrom(
     }
 #endif
 
-  return fres;
+    return fres;
 }
 
-void PhylogenyTreeBasic ::FindAllLabelsInSubtrees(
-    const set<TreeNode *> &setSTRoots, set<string> &setLabels) {
-  // get all labels
-  setLabels.clear();
-  for (set<TreeNode *>::const_iterator it = setSTRoots.begin();
-       it != setSTRoots.end(); ++it) {
-    set<string> setLblsCoveredStep;
-    (*it)->GetAllDistinctLeafLabeles(setLblsCoveredStep);
-    UnionSetsGen(setLabels, setLblsCoveredStep);
-  }
+void PhylogenyTreeBasic ::FindAllLabelsInSubtrees(const set<TreeNode *> &setSTRoots, set<string> &setLabels)
+{
+    // get all labels
+    setLabels.clear();
+    for (set<TreeNode *>::const_iterator it = setSTRoots.begin(); it != setSTRoots.end(); ++it)
+    {
+        set<string> setLblsCoveredStep;
+        (*it)->GetAllDistinctLeafLabeles(setLblsCoveredStep);
+        UnionSetsGen(setLabels, setLblsCoveredStep);
+    }
 }
 
-void PhylogenyTreeBasic ::FindDescendentsOfNodeWithin(
-    TreeNode *pAnc, const set<TreeNode *> &setNodesToChoose,
-    set<TreeNode *> &setDescendents) {
-  //
-  setDescendents.clear();
-  for (set<TreeNode *>::const_iterator itg = setNodesToChoose.begin();
-       itg != setNodesToChoose.end(); ++itg) {
-    int dummy;
-    if (pAnc->IsAncesterOf(*itg, dummy) == true) {
-      setDescendents.insert(*itg);
+void PhylogenyTreeBasic ::FindDescendentsOfNodeWithin(TreeNode *pAnc, const set<TreeNode *> &setNodesToChoose, set<TreeNode *> &setDescendents)
+{
+    //
+    setDescendents.clear();
+    for (set<TreeNode *>::const_iterator itg = setNodesToChoose.begin(); itg != setNodesToChoose.end(); ++itg)
+    {
+        int dummy;
+        if (pAnc->IsAncesterOf(*itg, dummy) == true)
+        {
+            setDescendents.insert(*itg);
+        }
     }
-  }
 }
 
-bool PhylogenyTreeBasic ::TestIsomorphic(
-    PhylogenyTreeBasic &treeOther,
-    map<TreeNode *, TreeNode *> &mapOldNodeToNew) const {
+bool PhylogenyTreeBasic ::TestIsomorphic(PhylogenyTreeBasic &treeOther, map<TreeNode *, TreeNode *> &mapOldNodeToNew) const
+{
 #if 0
 cout << "TestIsomorphic: current tree: ";
 this->Dump();
 cout << "treeOther: ";
 treeOther.Dump();
 #endif
-  // return true if isomorphic (and set the mapping between the leaf nodes
-  // collect shape label of two trees. Here, we map each current tree node to
-  // the corresponding one of the other
-  PhylogenyTreeBasic *pthis = const_cast<PhylogenyTreeBasic *>(this);
-  set<int> lvidsThis, lvidsOther;
-  pthis->GetLeaveIds(lvidsThis);
-  treeOther.GetLeaveIds(lvidsOther);
+    // return true if isomorphic (and set the mapping between the leaf nodes
+    // collect shape label of two trees. Here, we map each current tree node to the corresponding one of the other
+    PhylogenyTreeBasic *pthis = const_cast<PhylogenyTreeBasic *>(this);
+    set<int> lvidsThis, lvidsOther;
+    pthis->GetLeaveIds(lvidsThis);
+    treeOther.GetLeaveIds(lvidsOther);
 #if 0
 cout << "lvidsThis:";
 DumpIntSet(lvidsThis);
@@ -4986,90 +5443,86 @@ cout << "lvidsOther:";
 DumpIntSet(lvidsOther);
 #endif
 
-  map<TreeNode *, string> mapNodeShapeThis, mapNodeShapeOther;
-  vector<TreeNode *> listNodesThis, listNodesOther;
-  GetAllNodes(listNodesThis);
-  treeOther.GetAllNodes(listNodesOther);
-  for (int i = 0; i < (int)listNodesThis.size(); ++i) {
-    string strShape = listNodesThis[i]->GetShapeLabel(lvidsThis, true);
-    mapNodeShapeThis.insert(
-        map<TreeNode *, string>::value_type(listNodesThis[i], strShape));
-    // cout << "Find a shape (this):" << strShape << endl;
-  }
-  for (int i = 0; i < (int)listNodesOther.size(); ++i) {
-    string strShape = listNodesOther[i]->GetShapeLabel(lvidsOther, true);
-    mapNodeShapeOther.insert(
-        map<TreeNode *, string>::value_type(listNodesOther[i], strShape));
-    // cout << "Find a shape (other):" << strShape << endl;
-  }
-  if (mapNodeShapeThis[GetRoot()] != mapNodeShapeOther[treeOther.GetRoot()]) {
-    // cout << "Root label mismatch: " << mapNodeShapeThis[ GetRoot()]  << " vs
-    // " << mapNodeShapeOther[ treeOther.GetRoot() ]  << endl;
-    return false; // not isomorphic if the root symbol is not isomorhphic
-  }
-  // we also list the matching nodes of each node (incl. internal)
-  mapOldNodeToNew.clear();
-  mapOldNodeToNew.insert(
-      map<TreeNode *, TreeNode *>::value_type(GetRoot(), treeOther.GetRoot()));
-  stack<TreeNode *> stackNodesToProc;
-  stackNodesToProc.push(GetRoot());
-  while (stackNodesToProc.empty() == false) {
-    TreeNode *pnCurrOld = stackNodesToProc.top();
-    stackNodesToProc.pop();
+    map<TreeNode *, string> mapNodeShapeThis, mapNodeShapeOther;
+    vector<TreeNode *> listNodesThis, listNodesOther;
+    GetAllNodes(listNodesThis);
+    treeOther.GetAllNodes(listNodesOther);
+    for (int i = 0; i < (int)listNodesThis.size(); ++i)
+    {
+        string strShape = listNodesThis[i]->GetShapeLabel(lvidsThis, true);
+        mapNodeShapeThis.insert(map<TreeNode *, string>::value_type(listNodesThis[i], strShape));
+        //cout << "Find a shape (this):" << strShape << endl;
+    }
+    for (int i = 0; i < (int)listNodesOther.size(); ++i)
+    {
+        string strShape = listNodesOther[i]->GetShapeLabel(lvidsOther, true);
+        mapNodeShapeOther.insert(map<TreeNode *, string>::value_type(listNodesOther[i], strShape));
+        //cout << "Find a shape (other):" << strShape << endl;
+    }
+    if (mapNodeShapeThis[GetRoot()] != mapNodeShapeOther[treeOther.GetRoot()])
+    {
+        //cout << "Root label mismatch: " << mapNodeShapeThis[ GetRoot()]  << " vs " << mapNodeShapeOther[ treeOther.GetRoot() ]  << endl;
+        return false; // not isomorphic if the root symbol is not isomorhphic
+    }
+    // we also list the matching nodes of each node (incl. internal)
+    mapOldNodeToNew.clear();
+    mapOldNodeToNew.insert(map<TreeNode *, TreeNode *>::value_type(GetRoot(), treeOther.GetRoot()));
+    stack<TreeNode *> stackNodesToProc;
+    stackNodesToProc.push(GetRoot());
+    while (stackNodesToProc.empty() == false)
+    {
+        TreeNode *pnCurrOld = stackNodesToProc.top();
+        stackNodesToProc.pop();
 #if 0
 cout << "Processing node: ";
 pnCurrOld->Dump();
 cout << endl;
 #endif
-    // get all children
-    set<TreeNode *> setChildren;
-    pnCurrOld->GetAllChildren(setChildren);
-    map<string, set<TreeNode *> > setChildrenShape;
-    for (set<TreeNode *>::iterator it = setChildren.begin();
-         it != setChildren.end(); ++it) {
-      TreeNode *pchild = *it;
-      string strchild = mapNodeShapeThis[pchild];
-      if (setChildrenShape.find(strchild) == setChildrenShape.end()) {
-        set<TreeNode *> ss;
-        setChildrenShape.insert(
-            map<string, set<TreeNode *> >::value_type(strchild, ss));
-      }
-      setChildrenShape[strchild].insert(pchild);
+        // get all children
+        set<TreeNode *> setChildren;
+        pnCurrOld->GetAllChildren(setChildren);
+        map<string, set<TreeNode *>> setChildrenShape;
+        for (set<TreeNode *>::iterator it = setChildren.begin(); it != setChildren.end(); ++it)
+        {
+            TreeNode *pchild = *it;
+            string strchild = mapNodeShapeThis[pchild];
+            if (setChildrenShape.find(strchild) == setChildrenShape.end())
+            {
+                set<TreeNode *> ss;
+                setChildrenShape.insert(map<string, set<TreeNode *>>::value_type(strchild, ss));
+            }
+            setChildrenShape[strchild].insert(pchild);
 #if 0
 cout << "Adding a string:node pair: " << strchild << ": node: ";
 pchild->Dump();
 cout << endl;
 #endif
-      // also save for more processing
-      stackNodesToProc.push(pchild);
-    }
-    // now find the matching one
-    set<TreeNode *> setChildOther;
-    YW_ASSERT_INFO(mapOldNodeToNew.find(pnCurrOld) != mapOldNodeToNew.end(),
-                   "Fai to find1");
-    TreeNode *pnCurrOther = mapOldNodeToNew[pnCurrOld];
-    pnCurrOther->GetAllChildren(setChildOther);
+            // also save for more processing
+            stackNodesToProc.push(pchild);
+        }
+        // now find the matching one
+        set<TreeNode *> setChildOther;
+        YW_ASSERT_INFO(mapOldNodeToNew.find(pnCurrOld) != mapOldNodeToNew.end(), "Fai to find1");
+        TreeNode *pnCurrOther = mapOldNodeToNew[pnCurrOld];
+        pnCurrOther->GetAllChildren(setChildOther);
 #if 0
 cout << "Now check the matching other: ";
 pnCurrOther->Dump();
 cout << endl;
 #endif
-    for (set<TreeNode *>::iterator it = setChildOther.begin();
-         it != setChildOther.end(); ++it) {
-      TreeNode *pchildother = *it;
-      string strchildother = mapNodeShapeOther[pchildother];
+        for (set<TreeNode *>::iterator it = setChildOther.begin(); it != setChildOther.end(); ++it)
+        {
+            TreeNode *pchildother = *it;
+            string strchildother = mapNodeShapeOther[pchildother];
 #if 0
 cout << "child(other): ";
 pchildother->Dump();
 cout << ": stringshape: " << strchildother << endl;
 #endif
-      YW_ASSERT_INFO(setChildrenShape.find(strchildother) !=
-                             setChildrenShape.end() &&
-                         setChildrenShape[strchildother].size() > 0,
-                     "Fail to find2");
-      // assign to the first one in the list
-      TreeNode *pnmatch = *(setChildrenShape[strchildother].begin());
-      setChildrenShape[strchildother].erase(pnmatch);
+            YW_ASSERT_INFO(setChildrenShape.find(strchildother) != setChildrenShape.end() && setChildrenShape[strchildother].size() > 0, "Fail to find2");
+            // assign to the first one in the list
+            TreeNode *pnmatch = *(setChildrenShape[strchildother].begin());
+            setChildrenShape[strchildother].erase(pnmatch);
 #if 0
 cout << "Matching: pncurold: ";
 pnCurrOld->Dump();
@@ -5077,11 +5530,10 @@ cout << " to pnmatch:";
 pnmatch->Dump();
 cout << endl;
 #endif
-      // remember the matching
-      mapOldNodeToNew.insert(
-          map<TreeNode *, TreeNode *>::value_type(pnmatch, pchildother));
+            // remember the matching
+            mapOldNodeToNew.insert(map<TreeNode *, TreeNode *>::value_type(pnmatch, pchildother));
+        }
     }
-  }
 #if 0
 cout << "mapOldNodeToNew: \n";
 for( map<TreeNode*, TreeNode*> :: iterator it = mapOldNodeToNew.begin(); it != mapOldNodeToNew.end(); ++it)
@@ -5094,96 +5546,112 @@ cout << endl;
 }
 #endif
 
-  return true;
+    return true;
 }
 
-PhylogenyTreeBasic *ConsPhyTreeSubsetTaxa(PhylogenyTreeBasic *ptreeIn,
-                                          const set<int> &setTaxaKept) {
-  // construct a phylogeny tree by keeping subset of taxa
-  PhylogenyTreeBasic *pCopy = ptreeIn->Copy();
-  vector<TreeNode *> listLeafNodes;
-  pCopy->GetAllLeafNodes(listLeafNodes);
+PhylogenyTreeBasic *ConsPhyTreeSubsetTaxa(PhylogenyTreeBasic *ptreeIn, const set<int> &setTaxaKept)
+{
+    // construct a phylogeny tree by keeping subset of taxa
+    PhylogenyTreeBasic *pCopy = ptreeIn->Copy();
+    vector<TreeNode *> listLeafNodes;
+    pCopy->GetAllLeafNodes(listLeafNodes);
 
-  for (int i = 0; i < (int)listLeafNodes.size(); ++i) {
-    int lbl = listLeafNodes[i]->GetIntLabel();
-    if (setTaxaKept.find(lbl) == setTaxaKept.end()) {
-      // remove this node
-      TreeNode *pParOrig = listLeafNodes[i]->GetParent();
-      pCopy->RemoveNode(listLeafNodes[i]);
-      pCopy->RemoveDegreeOneNodeAt(pParOrig);
+    for (int i = 0; i < (int)listLeafNodes.size(); ++i)
+    {
+        int lbl = listLeafNodes[i]->GetIntLabel();
+        if (setTaxaKept.find(lbl) == setTaxaKept.end())
+        {
+            // remove this node
+            TreeNode *pParOrig = listLeafNodes[i]->GetParent();
+            pCopy->RemoveNode(listLeafNodes[i]);
+            pCopy->RemoveDegreeOneNodeAt(pParOrig);
+        }
     }
-  }
-  // pCopy->RemoveDegreeOneNodes();
+    //pCopy->RemoveDegreeOneNodes();
 
-  return pCopy;
+    return pCopy;
 }
 
 // implement needed
-string ConsEdgeLabeTreeSeg(const string &strNWWithLabels, int regBeg,
-                           int regEnd) {
-  // cout << "ConsEdgeLabeTreeSeg: [" << regBeg << "," << regEnd << "]: \n";
-  // if there is edge outside any parenthesis, keep it
-  int posRightParenths = regEnd;
-  while (posRightParenths > 0 && strNWWithLabels[posRightParenths] != ')') {
-    --posRightParenths;
-  }
-  string strChild;
-  if (posRightParenths > 0) {
-    // search for children, perform search for each segment between separator ,
-    // (on the same level)
-    vector<string> listChildStrs;
-    int level = 0;
-    int regChildStart = regBeg + 1;
-    for (int p = regBeg + 1; p <= posRightParenths - 1; ++p) {
-      if ((strNWWithLabels[p] == ',' || p == posRightParenths - 1) &&
-          level == 0) {
-        int regChildEnd = p - 1;
-        if (p == posRightParenths - 1) {
-          regChildEnd = p;
-        }
-        string strChildStep =
-            ConsEdgeLabeTreeSeg(strNWWithLabels, regChildStart, regChildEnd);
-        if (strChildStep.length() > 0) {
-          listChildStrs.push_back(strChildStep);
-        }
-        regChildStart = p + 1;
-      } else if (strNWWithLabels[p] == '(') {
-        ++level;
-      } else if (strNWWithLabels[p] == ')') {
-        --level;
-      }
-    }
-    if (listChildStrs.size() > 0) {
-      strChild = "(";
-      for (int i = 0; i < (int)listChildStrs.size(); ++i) {
-        strChild += listChildStrs[i];
-        if (i < (int)listChildStrs.size() - 1) {
-          strChild += ",";
-        }
-      }
-      strChild += ")";
-    }
-  }
-
-  string strEdgeLbelCur;
-  if (regEnd != posRightParenths) {
-    // search for :
-    int pos = regEnd;
-    while (pos >= regBeg && strNWWithLabels[pos] != ':') {
-      --pos;
-    }
-    if (pos >= regBeg) {
-      strEdgeLbelCur = strNWWithLabels.substr(pos + 1, regEnd - pos);
-    }
-  }
-  string strRes = strChild + strEdgeLbelCur;
-  // cout << "strRes: " << strRes << endl;
-  return strRes;
-}
-
-string ConsEdgeLabeTree(const string &strNWWithLabels) {
-  // construct newick format of edge label tree; that is,
-  // delete all taxa, only leave edge label
-  // e.g. ((2,4:#4):#3,(3:#5,5):#2,1):#1  ==> ((#4)#3,(#5)#2)#1
-  return ConsEdgeLabeTreeSeg(strNWWithLabels, 0, strNWWithLabels.length() - 1);
+string ConsEdgeLabeTreeSeg(const string &strNWWithLabels, int regBeg, int regEnd)
+{
+    //cout << "ConsEdgeLabeTreeSeg: [" << regBeg << "," << regEnd << "]: \n";
+    // if there is edge outside any parenthesis, keep it
+    int posRightParenths = regEnd;
+    while (posRightParenths > 0 && strNWWithLabels[posRightParenths] != ')')
+    {
+        --posRightParenths;
+    }
+    string strChild;
+    if (posRightParenths > 0)
+    {
+        // search for children, perform search for each segment between separator , (on the same level)
+        vector<string> listChildStrs;
+        int level = 0;
+        int regChildStart = regBeg + 1;
+        for (int p = regBeg + 1; p <= posRightParenths - 1; ++p)
+        {
+            if ((strNWWithLabels[p] == ',' || p == posRightParenths - 1) && level == 0)
+            {
+                int regChildEnd = p - 1;
+                if (p == posRightParenths - 1)
+                {
+                    regChildEnd = p;
+                }
+                string strChildStep = ConsEdgeLabeTreeSeg(strNWWithLabels, regChildStart, regChildEnd);
+                if (strChildStep.length() > 0)
+                {
+                    listChildStrs.push_back(strChildStep);
+                }
+                regChildStart = p + 1;
+            }
+            else if (strNWWithLabels[p] == '(')
+            {
+                ++level;
+            }
+            else if (strNWWithLabels[p] == ')')
+            {
+                --level;
+            }
+        }
+        if (listChildStrs.size() > 0)
+        {
+            strChild = "(";
+            for (int i = 0; i < (int)listChildStrs.size(); ++i)
+            {
+                strChild += listChildStrs[i];
+                if (i < (int)listChildStrs.size() - 1)
+                {
+                    strChild += ",";
+                }
+            }
+            strChild += ")";
+        }
+    }
+
+    string strEdgeLbelCur;
+    if (regEnd != posRightParenths)
+    {
+        // search for :
+        int pos = regEnd;
+        while (pos >= regBeg && strNWWithLabels[pos] != ':')
+        {
+            --pos;
+        }
+        if (pos >= regBeg)
+        {
+            strEdgeLbelCur = strNWWithLabels.substr(pos + 1, regEnd - pos);
+        }
+    }
+    string strRes = strChild + strEdgeLbelCur;
+    //cout << "strRes: " << strRes << endl;
+    return strRes;
+}
+
+string ConsEdgeLabeTree(const string &strNWWithLabels)
+{
+    // construct newick format of edge label tree; that is,
+    // delete all taxa, only leave edge label
+    // e.g. ((2,4:#4):#3,(3:#5,5):#2,1):#1  ==> ((#4)#3,(#5)#2)#1
+    return ConsEdgeLabeTreeSeg(strNWWithLabels, 0, strNWWithLabels.length() - 1);
 }
diff --git a/trisicell/external/scistree/PhylogenyTreeBasic.h b/trisicell/external/scistree/PhylogenyTreeBasic.h
index 4c6f2e6..c6544ba 100644
--- a/trisicell/external/scistree/PhylogenyTreeBasic.h
+++ b/trisicell/external/scistree/PhylogenyTreeBasic.h
@@ -1,20 +1,20 @@
 #ifndef PHYLOGENY_TREE_BASIC_H
 #define PHYLOGENY_TREE_BASIC_H
 
-#include <cstdio>
-#include <fstream>
 #include <iostream>
+#include <fstream>
+#include <cstdio>
+#include <vector>
 #include <set>
-#include <stack>
 #include <string>
-#include <vector>
+#include <stack>
 
-#include <cstdio>
-#include <cstdlib>
-#include <ctime>
 #include <sys/types.h>
 #include <time.h>
 #include <unistd.h>
+#include <ctime>
+#include <cstdlib>
+#include <cstdio>
 
 #include "Utils.h"
 
@@ -25,151 +25,142 @@ using namespace std;
 //*****************************************************************************
 
 // utilities for Newick format
-class NewickUtils {
+class NewickUtils
+{
 public:
-  NewickUtils() {}
+    NewickUtils() {}
 
-  static void RetrieveLabelSet(const string &strNW,
-                               multiset<string> &setLabels);
-  static bool FindSplitIn(const string &strNW, string &strPart1,
-                          string &strPart2);
-  static void UpdateLabells(string &strNW,
-                            const map<string, string> &mapOldLabelToNew);
-  static string RemoveBrLenFromTree(string &strNW);
-  static void ConsolidateSinglChildChain(string &strNW);
-  static double GetLenAt(const string &strNW, int posLen);
+    static void RetrieveLabelSet(const string &strNW, multiset<string> &setLabels);
+    static bool FindSplitIn(const string &strNW, string &strPart1, string &strPart2);
+    static void UpdateLabells(string &strNW, const map<string, string> &mapOldLabelToNew);
+    static string RemoveBrLenFromTree(string &strNW);
+    static void ConsolidateSinglChildChain(string &strNW);
+    static double GetLenAt(const string &strNW, int posLen);
 };
 
-// map between string-based taxa to integer based id (used internally by the
-// code)
-class TaxaMapper {
+// map between string-based taxa to integer based id (used internally by the code)
+class TaxaMapper
+{
 public:
-  //
-  TaxaMapper();
+    //
+    TaxaMapper();
 
-  // utility
-  bool IsInitialized() { return fInit; }
-  void SetInitialized(bool f) { fInit = f; }
-  void InitToDec1Mode(int numTaxa);
-  bool IsEmpty();
-  bool IsIdIn(int id);
-  int AddTaxaString(const string &str);
-  void AddTaxaStringWithId(int tid, const string &str);
-  int GetId(const string &str);
-  string GetString(const int id);
-  string ConvIdStringWithOrigTaxa(const string &strId);
-  int GetNumTaxaInMapper() const { return mapIdToStr.size(); }
-  void GetAllTaxaIds(set<int> &taxaIndices) const;
-  void GetAllTaxaStrs(set<string> &setStrs) const;
-  void Dump() const;
-  static string ExtractIdPartFromStr(const string &strIdNW);
-  static int GetIdFromStr(const string &strPart, TaxaMapper *pTMapper);
+    // utility
+    bool IsInitialized() { return fInit; }
+    void SetInitialized(bool f) { fInit = f; }
+    void InitToDec1Mode(int numTaxa);
+    bool IsEmpty();
+    bool IsIdIn(int id);
+    int AddTaxaString(const string &str);
+    void AddTaxaStringWithId(int tid, const string &str);
+    int GetId(const string &str);
+    string GetString(const int id);
+    string ConvIdStringWithOrigTaxa(const string &strId);
+    int GetNumTaxaInMapper() const { return mapIdToStr.size(); }
+    void GetAllTaxaIds(set<int> &taxaIndices) const;
+    void GetAllTaxaStrs(set<string> &setStrs) const;
+    void Dump() const;
+    static string ExtractIdPartFromStr(const string &strIdNW);
+    static int GetIdFromStr(const string &strPart, TaxaMapper *pTMapper);
 
 private:
-  map<string, int> mapStrToId;
-  map<int, string> mapIdToStr;
-  int curId;
-  bool fInit;
+    map<string, int> mapStrToId;
+    map<int, string> mapIdToStr;
+    int curId;
+    bool fInit;
 };
 
 //*****************************************************************************
 // Defintions and utilties class, not for external use.
-// Myabe I should create a separate file for these implementation-only stuff.
-// Later
+// Myabe I should create a separate file for these implementation-only stuff. Later
 // ****************************************************************************
-typedef enum { PHY_TN_DEFAULT_SHAPE = 0, PHY_TN_RECTANGLE = 1 } TREE_NODE_SHAPE;
+typedef enum
+{
+    PHY_TN_DEFAULT_SHAPE = 0,
+    PHY_TN_RECTANGLE = 1
+} TREE_NODE_SHAPE;
 
-class TreeNode {
-  friend class PhylogenyTreeBasic;
-  friend class PhylogenyTree;
+class TreeNode
+{
+    friend class PhylogenyTreeBasic;
+    friend class PhylogenyTree;
 
 public:
-  TreeNode();
-  TreeNode(int iid);
-  ~TreeNode();
+    TreeNode();
+    TreeNode(int iid);
+    ~TreeNode();
 
-  TreeNode *Copy();
-  void AddChild(TreeNode *pChild, const vector<int> &labels);
-  void AddEdgeLabelToChild(int cIndex, int lbl);
-  void RemoveChild(TreeNode *pChild);
-  void RemoveAllChildren();
-  void DetachAllChildren();
-  void DetachSelf();
-  void SetLength(double len) { lenBranchAbove = len; }
-  double GetLength() const { return lenBranchAbove; }
-  void SetLabel(const string str) { label = str; }
-  bool IsLeaf() const { return listChildren.size() == 0; }
-  void AddNodeValue(int val) { nodeValues.push_back(val); }
-  int GetChildrenNum() const { return listChildren.size(); }
-  int GetNumNodesUnder(bool fInternalOnly,
-                       bool fAddNonBinary) const; // include itself if this is
-                                                  // an internal node
-  int GetLevel() const; // level: leaf at 0, internal: longest path to some leaf
-                        // under
-  TreeNode *GetChild(int i) { return listChildren[i]; }
-  void GetDescendentLabelSet(set<int> &labelSet);
-  bool IsAncesterOf(TreeNode *pAssumedDescend, int &branchIndex);
-  int GetNumEdgesToAncestor(TreeNode *pAssumedAncestor);
-  int GetID() const { return id; }
-  void SetID(int i) { id = i; }
-  string GetLabel() const { return label; }
-  void SetUserLabel(const string &str) { labelUserProvided = str; }
-  string GetUserLabel() const { return labelUserProvided; }
-  void RemoveLabels();
-  void RemoveLabelsPar();
-  void IncEdgeLabelsBy(int offset, bool fSub);
-  int GetIntLabel() const;
-  void SetIntLabel(int lbl);
-  TREE_NODE_SHAPE GetShape() { return shape; }
-  void SetShape(TREE_NODE_SHAPE param) { shape = param; }
-  void GetEdgeLabelsAtBranch(int i, vector<int> &labels) {
-    labels = listEdgeLabels[i];
-  }
-  void GetEdgeLabelsToChild(TreeNode *pChild, vector<int> &lbls);
-  TreeNode *GetParent() { return parent; }
-  void SetParent(TreeNode *ppar) { parent = ppar; }
-  TreeNode *GetRoot() const;
-  void GetSiblings(vector<TreeNode *> &listSibs);
-  void GetAllChildren(set<TreeNode *> &setChildren) const;
-  void GetAllDescendents(set<TreeNode *> &setDescendents);
-  void GetAllLeavesUnder(set<TreeNode *> &setDescendents);
-  void GetAllLeavesIdUnder(set<int> &setDescendents);
-  void GetAllLeafLabeles(vector<string> &listLeafLabels);
-  void GetAllLeafIntLabeles(vector<int> &listLeafLabels);
-  void GetAllDistinctLeafLabeles(set<string> &setLeafLabels);
-  void GetAllDescendIntLbls(set<int> &setIntLbs);
-  void GetAllAncestors(set<TreeNode *> &listAncestors);
-  string GetShapeLabel(const set<int> &idTerms,
-                       map<int, int> &mapNodeLabel) const;
-  string GetShapeLabel(const set<int> &idTerms, bool fSort = true) const;
-  // string GetShapeLabelDistinct(const set<int> &idTerms) const;
-  string
-  GetShapeLabelNodeBrNum(map<TreeNode *, pair<int, int> > &mapNodeNumBrannches,
-                         vector<int> &listORderedLeaves);
-  TreeNode *GetMRCA(TreeNode *pOther);
-  void Order();
-  bool IsMulfurcate();
-  bool IsCheryNode() {
-    return (GetChildrenNum() == 2 && GetChild(0)->IsLeaf() == true &&
-            GetChild(1)->IsLeaf());
-  }
-  bool IsRoot() const { return parent == NULL; }
-  int GetChildIndex(TreeNode *pchild) const;
-  void Binarize(int &idToUseNext);
-  int GetMaxIdWithinSubtree() const;
-  void Dump() const;
+    TreeNode *Copy();
+    void AddChild(TreeNode *pChild, const vector<int> &labels);
+    void AddEdgeLabelToChild(int cIndex, int lbl);
+    void RemoveChild(TreeNode *pChild);
+    void RemoveAllChildren();
+    void DetachAllChildren();
+    void DetachSelf();
+    void SetLength(double len) { lenBranchAbove = len; }
+    double GetLength() const { return lenBranchAbove; }
+    void SetLabel(const string str) { label = str; }
+    bool IsLeaf() const { return listChildren.size() == 0; }
+    void AddNodeValue(int val) { nodeValues.push_back(val); }
+    int GetChildrenNum() const { return listChildren.size(); }
+    int GetNumNodesUnder(bool fInternalOnly, bool fAddNonBinary) const; // include itself if this is an internal node
+    int GetLevel() const;                                               // level: leaf at 0, internal: longest path to some leaf under
+    TreeNode *GetChild(int i) { return listChildren[i]; }
+    void GetDescendentLabelSet(set<int> &labelSet);
+    bool IsAncesterOf(TreeNode *pAssumedDescend, int &branchIndex);
+    int GetNumEdgesToAncestor(TreeNode *pAssumedAncestor);
+    int GetID() const { return id; }
+    void SetID(int i) { id = i; }
+    string GetLabel() const { return label; }
+    void SetUserLabel(const string &str) { labelUserProvided = str; }
+    string GetUserLabel() const { return labelUserProvided; }
+    void RemoveLabels();
+    void RemoveLabelsPar();
+    void IncEdgeLabelsBy(int offset, bool fSub);
+    int GetIntLabel() const;
+    void SetIntLabel(int lbl);
+    TREE_NODE_SHAPE GetShape() { return shape; }
+    void SetShape(TREE_NODE_SHAPE param) { shape = param; }
+    void GetEdgeLabelsAtBranch(int i, vector<int> &labels) { labels = listEdgeLabels[i]; }
+    void GetEdgeLabelsToChild(TreeNode *pChild, vector<int> &lbls);
+    TreeNode *GetParent() { return parent; }
+    void SetParent(TreeNode *ppar) { parent = ppar; }
+    TreeNode *GetRoot() const;
+    void GetSiblings(vector<TreeNode *> &listSibs);
+    void GetAllChildren(set<TreeNode *> &setChildren) const;
+    void GetAllDescendents(set<TreeNode *> &setDescendents);
+    void GetAllLeavesUnder(set<TreeNode *> &setDescendents);
+    void GetAllLeavesIdUnder(set<int> &setDescendents);
+    void GetAllLeafLabeles(vector<string> &listLeafLabels);
+    void GetAllLeafIntLabeles(vector<int> &listLeafLabels);
+    void GetAllDistinctLeafLabeles(set<string> &setLeafLabels);
+    void GetAllDescendIntLbls(set<int> &setIntLbs);
+    void GetAllAncestors(set<TreeNode *> &listAncestors);
+    string GetShapeLabel(const set<int> &idTerms, map<int, int> &mapNodeLabel) const;
+    string GetShapeLabel(const set<int> &idTerms, bool fSort = true) const;
+    //string GetShapeLabelDistinct(const set<int> &idTerms) const;
+    string GetShapeLabelNodeBrNum(map<TreeNode *, pair<int, int>> &mapNodeNumBrannches, vector<int> &listORderedLeaves);
+    TreeNode *GetMRCA(TreeNode *pOther);
+    void Order();
+    bool IsMulfurcate();
+    bool IsCheryNode() { return (GetChildrenNum() == 2 && GetChild(0)->IsLeaf() == true && GetChild(1)->IsLeaf()); }
+    bool IsRoot() const { return parent == NULL; }
+    int GetChildIndex(TreeNode *pchild) const;
+    void Binarize(int &idToUseNext);
+    int GetMaxIdWithinSubtree() const;
+    void Dump() const;
 
 private:
-  vector<TreeNode *> listChildren;
-  vector<vector<int> > listEdgeLabels; // What labels is used in the edge
-  TreeNode *parent;
-  int id;                 // id of this node, should be UNIQUE
-  vector<int> nodeValues; // A node can have several values, for example, nodes
-                          // labeling CAUTION: we assume node value is >=0 !!!!!
-  string label;
-  string labelUserProvided; // this ist he label before any conversion
-  TREE_NODE_SHAPE shape;
-  double lenBranchAbove;
+    vector<TreeNode *> listChildren;
+    vector<vector<int>> listEdgeLabels; // What labels is used in the edge
+    TreeNode *parent;
+    int id;                 // id of this node, should be UNIQUE
+    vector<int> nodeValues; // A node can have several values, for example, nodes labeling
+                            // CAUTION: we assume node value is >=0 !!!!!
+    string label;
+    string labelUserProvided; // this ist he label before any conversion
+    TREE_NODE_SHAPE shape;
+    double lenBranchAbove;
 };
 
 // ***************************************************************************
@@ -177,230 +168,163 @@ class TreeNode {
 // ***************************************************************************
 class PhylogenyTreeBasic;
 
-class PhylogenyTreeIteratorBacktrack {
+class PhylogenyTreeIteratorBacktrack
+{
 public:
-  PhylogenyTreeIteratorBacktrack(PhylogenyTreeBasic &pt) : phyTree(pt) {}
-  void Init();
-  void Next();
-  void Back(); // do not continue going downwards (i.e. do not explore its
-               // descendent)
-  bool IsDone();
-  TreeNode *GetCurrNode();
+    PhylogenyTreeIteratorBacktrack(PhylogenyTreeBasic &pt) : phyTree(pt) {}
+    void Init();
+    void Next();
+    void Back(); // do not continue going downwards (i.e. do not explore its descendent)
+    bool IsDone();
+    TreeNode *GetCurrNode();
 
 private:
-  PhylogenyTreeBasic &phyTree;
-  stack<TreeNode *> stackNodesToExplore;
-  // TreeNode *pCurr;
+    PhylogenyTreeBasic &phyTree;
+    stack<TreeNode *> stackNodesToExplore;
+    //TreeNode *pCurr;
 };
 
-class PhylogenyTreeIterator {
+class PhylogenyTreeIterator
+{
 public:
-  PhylogenyTreeIterator(PhylogenyTreeBasic &pt) : phyTree(pt) {}
-  void Init();
-  void Next();
-  bool IsDone();
-  TreeNode *GetCurrNode();
+    PhylogenyTreeIterator(PhylogenyTreeBasic &pt) : phyTree(pt) {}
+    void Init();
+    void Next();
+    bool IsDone();
+    TreeNode *GetCurrNode();
 
 private:
-  PhylogenyTreeBasic &phyTree;
-  stack<TreeNode *> stackPostorder;
-  // TreeNode *pCurr;
+    PhylogenyTreeBasic &phyTree;
+    stack<TreeNode *> stackPostorder;
+    //TreeNode *pCurr;
 };
 
 // ***************************************************************************
 // Define phylogeny tree class
 // ***************************************************************************
 
-class PhylogenyTreeBasic {
-  friend class PhylogenyTreeIterator;
-  friend class PhylogenyTreeIteratorBacktrack;
+class PhylogenyTreeBasic
+{
+    friend class PhylogenyTreeIterator;
+    friend class PhylogenyTreeIteratorBacktrack;
 
 public:
-  PhylogenyTreeBasic(); // Empty tree
-  virtual ~PhylogenyTreeBasic();
-  PhylogenyTreeBasic *Copy();
-  void InitPostorderWalk(); // when walk, return the value of the node if any
-  TreeNode *NextPostorderWalk();
-  void OutputGML(const char *inFileName);
-  void OutputGMLNoLabel(const char *inFileName);
-  void ConsNewick(string &strNewick, bool wGridLen = false,
-                  double gridWidth = 1.0, bool fUseCurLbl = false);
-  void ConsNewickSorted(string &strNewick, bool wGridLen = false,
-                        double gridWidth = 1.0, bool fUseCurLbl = false);
-  void ConsNewickEdgeLabel(string &strNewick);
-  TreeNode *AddTreeNode(TreeNode *parNode, int id);
-  void ConsOnNewick(const string &nwString, int numLeaves = -1,
-                    bool fBottomUp = false, TaxaMapper *pTMapper = NULL);
-  void ConsOnNewickDupLabels(const string &nwString,
-                             TaxaMapper *pTMapper = NULL);
-  void ConsOnNewickEdgeLabelTree(const string &nwString);
-  int GetNumVertices() const;
-  int GetNumLeaves();
-  int GetNumInternalNodes();
-  void GetNodeParInfo(vector<int> &nodeIds, vector<int> &parPos);
-  void GetNodeParInfoNew(vector<int> &nodeIds, vector<int> &parPos);
-  bool ConsOnParPosList(const vector<int> &parPos, int numLeaves = -1,
-                        bool fBottupUpLabel = false);
-  void GetLeaveIds(set<int> &lvids);
-  void GetLeafIntLabels(set<int> &setIntLabels);
-  void GetLeavesIdsWithLabel(const string &label, set<int> &lvids);
-  void GetLeavesWithLabels(const set<string> &setLabels,
-                           set<TreeNode *> &setLvNodes);
-  void UpdateIntLabel(const vector<int> &listLabels);
-  TreeNode *GetRoot() const { return rootNode; }
-  void SetRoot(TreeNode *rn) {
-    if (rootNode != NULL)
-      delete rootNode;
-    rootNode = rn;
-  }
-  void SetRootPlain(TreeNode *rn) { rootNode = rn; }
-  void GetAllLeafLabeles(vector<string> &listLeafLabels) {
-    rootNode->GetAllLeafLabeles(listLeafLabels);
-  }
-  void GetAllLeafIntLabeles(vector<int> &listLeafLabels) {
-    rootNode->GetAllLeafIntLabeles(listLeafLabels);
-  }
-  string GetShapeLabel(const set<int> &idTerms,
-                       map<int, int> &mapNodeLabel) const {
-    return rootNode->GetShapeLabel(idTerms, mapNodeLabel);
-  }
-  string GetShapeLabel(const set<int> &idTerms, bool fSort = true) const {
-    return rootNode->GetShapeLabel(idTerms, fSort);
-  }
-  // string GetShapeLabelDistinct(const set<int> &idTerms ) const { return
-  // rootNode->GetShapeLabelDistinct(idTerms); }
-  string
-  GetShapeLabelNodeBrNum(map<TreeNode *, pair<int, int> > &mapNodeNumBrannches,
-                         vector<int> &listORderedLeaves);
-  bool TestIsomorphic(PhylogenyTreeBasic &treeOther,
-                      map<TreeNode *, TreeNode *> &mapOldNodeToNew) const;
-  void Reroot(TreeNode *pRootDesc); // pRootDesc: the node in the current tree
-                                    // (must be, but we will not check) which
-                                    // will be root's descendent
-  void GetAllLeafNodes(vector<TreeNode *> &listLeafNodes) const;
-  void GetAllNodes(vector<TreeNode *> &listLeafNodes) const;
-  void Order() { rootNode->Order(); }
-  bool IsMulfurcate() { return GetRoot()->IsMulfurcate(); }
-  void CleanNonLabeledLeaves();
-  void RemoveNode(TreeNode *pn);
-  void RemoveNodeKeepChildren(TreeNode *pn);
-  void RemoveDegreeOneNodeAt(TreeNode *pn);
-  void RemoveDegreeOneNodes();
-  void RemoveEdgeLabels();
-  void RemoveEdgeLabelsToLeaves();
-  void IncEdgeLabelsBy(int offset);
-  void ConsPhyTreeFromClusters(const set<set<int> > &setClusters);
-  static void RemoveDescendentsFrom(set<TreeNode *> &setTreeNodes);
-  void FindCladeOfSubsetLeaves(const set<TreeNode *> &setLeaves,
-                               set<set<TreeNode *> > &setSubtreeClades);
-  void FindCladeOfSubsetLeavesExact(const set<TreeNode *> &setLeaves,
-                                    set<set<TreeNode *> > &setSubtreeClades);
-  static void
-  GroupLeavesToSubtrees(const set<TreeNode *> &setLeaves,
-                        const set<set<TreeNode *> > &cladeNodesToProc,
-                        set<set<TreeNode *> > &setSubtreeClades);
-  static void
-  GroupLeavesToSubtreesSamePar(const set<TreeNode *> &setLeaves,
-                               const set<set<TreeNode *> > &cladeNodesToProc,
-                               set<set<TreeNode *> > &setSubtreeClades);
-  static void GroupNodesWithCommonPars(
-      const set<TreeNode *> &setNodes,
-      map<TreeNode *, set<TreeNode *> > &mapNodesWithSamePar);
-  void GetAllClades(set<set<int> > &setClades);
-  void GetAllCladesList(vector<set<int> > &listClades);
-  void GetAllCladesById(set<set<int> > &setClades);
-  void GetAllCladeNodess(set<set<TreeNode *> > &setClades);
-  void GetAllCladeGroupsIntLabel(
-      multiset<multiset<multiset<int> > > &setCladeGroupsDupLabels,
-      multiset<int> &rootClade);
-  TreeNode *GetSubtreeRootForLeaves(const set<TreeNode *> &setLvNodes);
-  void GetSubtreesWithMaxSize(set<TreeNode *> &setSTRoots,
-                              int maxSzSubtree) const;
-  void GetMaxSubtrees(set<TreeNode *> &setSTRootsIdents);
-  void MakeSubtreeUnrefined(TreeNode *pSubtree);
-  void Binarize();
-  void CreatePhyTreeFromLeavesWithLabels(const set<string> &setLeafLabels,
-                                         PhylogenyTreeBasic &treeToProc,
-                                         bool fUseOldTaxonName);
-  void AssignLeafLabels(const map<int, string> &mapLeafLbls);
-  void ReassignLeafLabels(const map<string, string> &mapLeafLbls);
-  void SetUserLabelToCurrLabels();
-  void SetLabelsToCurrUserLabels();
-  int GetMaxDegree() const;
-  static bool GetSiblingsPairFrom(const set<TreeNode *> &setNodesToChoose,
-                                  pair<TreeNode *, TreeNode *> &pairSibs);
-  static bool GetSiblingsNodesFrom(const set<TreeNode *> &setNodesToChoose,
-                                   set<TreeNode *> &setSibs);
-  static void FindAllLabelsInSubtrees(const set<TreeNode *> &setSTRoots,
-                                      set<string> &setLabels);
-  static void
-  FindDescendentsOfNodeWithin(TreeNode *pAnc,
-                              const set<TreeNode *> &setNodesToChoose,
-                              set<TreeNode *> &setDescendents);
-  void Dump() const;
+    PhylogenyTreeBasic(); // Empty tree
+    virtual ~PhylogenyTreeBasic();
+    PhylogenyTreeBasic *Copy();
+    void InitPostorderWalk(); // when walk, return the value of the node if any
+    TreeNode *NextPostorderWalk();
+    void OutputGML(const char *inFileName);
+    void OutputGMLNoLabel(const char *inFileName);
+    void ConsNewick(string &strNewick, bool wGridLen = false, double gridWidth = 1.0, bool fUseCurLbl = false);
+    void ConsNewickSorted(string &strNewick, bool wGridLen = false, double gridWidth = 1.0, bool fUseCurLbl = false);
+    void ConsNewickEdgeLabel(string &strNewick);
+    TreeNode *AddTreeNode(TreeNode *parNode, int id);
+    void ConsOnNewick(const string &nwString, int numLeaves = -1, bool fBottomUp = false, TaxaMapper *pTMapper = NULL);
+    void ConsOnNewickDupLabels(const string &nwString, TaxaMapper *pTMapper = NULL);
+    void ConsOnNewickEdgeLabelTree(const string &nwString);
+    int GetNumVertices() const;
+    int GetNumLeaves();
+    int GetNumInternalNodes();
+    void GetNodeParInfo(vector<int> &nodeIds, vector<int> &parPos);
+    void GetNodeParInfoNew(vector<int> &nodeIds, vector<int> &parPos);
+    bool ConsOnParPosList(const vector<int> &parPos, int numLeaves = -1, bool fBottupUpLabel = false);
+    void GetLeaveIds(set<int> &lvids);
+    void GetLeafIntLabels(set<int> &setIntLabels);
+    void GetLeavesIdsWithLabel(const string &label, set<int> &lvids);
+    void GetLeavesWithLabels(const set<string> &setLabels, set<TreeNode *> &setLvNodes);
+    void UpdateIntLabel(const vector<int> &listLabels);
+    TreeNode *GetRoot() const { return rootNode; }
+    void SetRoot(TreeNode *rn)
+    {
+        if (rootNode != NULL)
+            delete rootNode;
+        rootNode = rn;
+    }
+    void SetRootPlain(TreeNode *rn) { rootNode = rn; }
+    void GetAllLeafLabeles(vector<string> &listLeafLabels) { rootNode->GetAllLeafLabeles(listLeafLabels); }
+    void GetAllLeafIntLabeles(vector<int> &listLeafLabels) { rootNode->GetAllLeafIntLabeles(listLeafLabels); }
+    string GetShapeLabel(const set<int> &idTerms, map<int, int> &mapNodeLabel) const { return rootNode->GetShapeLabel(idTerms, mapNodeLabel); }
+    string GetShapeLabel(const set<int> &idTerms, bool fSort = true) const { return rootNode->GetShapeLabel(idTerms, fSort); }
+    //string GetShapeLabelDistinct(const set<int> &idTerms ) const { return rootNode->GetShapeLabelDistinct(idTerms); }
+    string GetShapeLabelNodeBrNum(map<TreeNode *, pair<int, int>> &mapNodeNumBrannches, vector<int> &listORderedLeaves);
+    bool TestIsomorphic(PhylogenyTreeBasic &treeOther, map<TreeNode *, TreeNode *> &mapOldNodeToNew) const;
+    void Reroot(TreeNode *pRootDesc); // pRootDesc: the node in the current tree (must be, but we will not check) which will be root's descendent
+    void GetAllLeafNodes(vector<TreeNode *> &listLeafNodes) const;
+    void GetAllNodes(vector<TreeNode *> &listLeafNodes) const;
+    void Order() { rootNode->Order(); }
+    bool IsMulfurcate() { return GetRoot()->IsMulfurcate(); }
+    void CleanNonLabeledLeaves();
+    void RemoveNode(TreeNode *pn);
+    void RemoveNodeKeepChildren(TreeNode *pn);
+    void RemoveDegreeOneNodeAt(TreeNode *pn);
+    void RemoveDegreeOneNodes();
+    void RemoveEdgeLabels();
+    void RemoveEdgeLabelsToLeaves();
+    void IncEdgeLabelsBy(int offset);
+    void ConsPhyTreeFromClusters(const set<set<int>> &setClusters);
+    static void RemoveDescendentsFrom(set<TreeNode *> &setTreeNodes);
+    void FindCladeOfSubsetLeaves(const set<TreeNode *> &setLeaves, set<set<TreeNode *>> &setSubtreeClades);
+    void FindCladeOfSubsetLeavesExact(const set<TreeNode *> &setLeaves, set<set<TreeNode *>> &setSubtreeClades);
+    static void GroupLeavesToSubtrees(const set<TreeNode *> &setLeaves, const set<set<TreeNode *>> &cladeNodesToProc, set<set<TreeNode *>> &setSubtreeClades);
+    static void GroupLeavesToSubtreesSamePar(const set<TreeNode *> &setLeaves, const set<set<TreeNode *>> &cladeNodesToProc, set<set<TreeNode *>> &setSubtreeClades);
+    static void GroupNodesWithCommonPars(const set<TreeNode *> &setNodes, map<TreeNode *, set<TreeNode *>> &mapNodesWithSamePar);
+    void GetAllClades(set<set<int>> &setClades);
+    void GetAllCladesList(vector<set<int>> &listClades);
+    void GetAllCladesById(set<set<int>> &setClades);
+    void GetAllCladeNodess(set<set<TreeNode *>> &setClades);
+    void GetAllCladeGroupsIntLabel(multiset<multiset<multiset<int>>> &setCladeGroupsDupLabels, multiset<int> &rootClade);
+    TreeNode *GetSubtreeRootForLeaves(const set<TreeNode *> &setLvNodes);
+    void GetSubtreesWithMaxSize(set<TreeNode *> &setSTRoots, int maxSzSubtree) const;
+    void GetMaxSubtrees(set<TreeNode *> &setSTRootsIdents);
+    void MakeSubtreeUnrefined(TreeNode *pSubtree);
+    void Binarize();
+    void CreatePhyTreeFromLeavesWithLabels(const set<string> &setLeafLabels, PhylogenyTreeBasic &treeToProc, bool fUseOldTaxonName);
+    void AssignLeafLabels(const map<int, string> &mapLeafLbls);
+    void ReassignLeafLabels(const map<string, string> &mapLeafLbls);
+    void SetUserLabelToCurrLabels();
+    void SetLabelsToCurrUserLabels();
+    int GetMaxDegree() const;
+    static bool GetSiblingsPairFrom(const set<TreeNode *> &setNodesToChoose, pair<TreeNode *, TreeNode *> &pairSibs);
+    static bool GetSiblingsNodesFrom(const set<TreeNode *> &setNodesToChoose, set<TreeNode *> &setSibs);
+    static void FindAllLabelsInSubtrees(const set<TreeNode *> &setSTRoots, set<string> &setLabels);
+    static void FindDescendentsOfNodeWithin(TreeNode *pAnc, const set<TreeNode *> &setNodesToChoose, set<TreeNode *> &setDescendents);
+    void Dump() const;
 
 protected:
-  void PostOrderPushStack(TreeNode *treeNode,
-                          stack<TreeNode *> &stackPostorder);
-  string ConsNewickTreeNode(TreeNode *pNode, bool wGridLen = false,
-                            double gridWidth = 1.0, bool fUseCurLbl = false,
-                            bool fSort = false, bool fOutEdgeLabel = false);
-  TreeNode *ConsOnNewickSubtree(const string &nwStringPart, int &leafId,
-                                int &invId, int numLeaves = -1,
-                                bool fBottomUp = false,
-                                TaxaMapper *pTMapper = NULL);
-  bool ConvParPosToNewick(const vector<int> &parPos, string &strNewick);
-  void ConvParPosToNewickSubtree(int nodeInd, const vector<int> &parPos,
-                                 string &strNewick);
-  TreeNode *ConsOnNewickSubtreeDupLabels(const string &nwStringPart, int &invId,
-                                         int &leafId,
-                                         TaxaMapper *pTMapper = NULL);
-  // void GetSubtreesWithMaxSizeExcludeTaxa(set<TreeNode *> &setSTRoots, int
-  // maxSzSubtree, const set<string> &setTaxaAllowed) const; int GetIdFromStr(
-  // const string &strPart, TaxaMapper *pTMapper );
+    void PostOrderPushStack(TreeNode *treeNode, stack<TreeNode *> &stackPostorder);
+    string ConsNewickTreeNode(TreeNode *pNode, bool wGridLen = false, double gridWidth = 1.0, bool fUseCurLbl = false, bool fSort = false, bool fOutEdgeLabel = false);
+    TreeNode *ConsOnNewickSubtree(const string &nwStringPart, int &leafId, int &invId, int numLeaves = -1, bool fBottomUp = false, TaxaMapper *pTMapper = NULL);
+    bool ConvParPosToNewick(const vector<int> &parPos, string &strNewick);
+    void ConvParPosToNewickSubtree(int nodeInd, const vector<int> &parPos, string &strNewick);
+    TreeNode *ConsOnNewickSubtreeDupLabels(const string &nwStringPart, int &invId, int &leafId, TaxaMapper *pTMapper = NULL);
+    //void GetSubtreesWithMaxSizeExcludeTaxa(set<TreeNode *> &setSTRoots, int maxSzSubtree, const set<string> &setTaxaAllowed) const;
+    //int GetIdFromStr( const string &strPart, TaxaMapper *pTMapper );
 
-  // Privaet data members
-  TreeNode *rootNode;
+    // Privaet data members
+    TreeNode *rootNode;
 
-  // Postoder traversal
-  stack<TreeNode *> stackPostorder;
-  int numLeaves;
+    // Postoder traversal
+    stack<TreeNode *> stackPostorder;
+    int numLeaves;
 };
 
 //*****************************************************************************
 string GetStringFromId(int id);
-int GetNewickNumLeaves(const string &strNewick, char chSepLeft = '(',
-                       char chSepRight = ')', char midSep = ',');
-bool GetTripleType(TreeNode *pn1, TreeNode *pn2, TreeNode *pn3,
-                   pair<pair<TreeNode *, TreeNode *>, TreeNode *> &triple);
-bool ReadinPhyloTreesNewick(ifstream &inFile, int numLeaves,
-                            vector<PhylogenyTreeBasic *> &treePtrList,
-                            TaxaMapper *pTMapper = NULL);
-void InitRandomTree(PhylogenyTreeBasic &treeToInit, int numTaxa,
-                    int rndSeed = -1);
-void CreatePhyTreeWithRootedSplits(PhylogenyTreeBasic &treeToProc, int numTaxa,
-                                   const set<set<int> > &setGivenSplits);
-void DumpAllSubtreesWithTaxaSize(
-    const vector<PhylogenyTreeBasic *> &listPtrGTrees, int numTaxonSubtree,
-    const char *fileNameOut);
-void DumpAllSubtreesWithBoundedSize(
-    const vector<PhylogenyTreeBasic *> &listPtrGTrees, int maxSzSubtree,
-    int maxIdentSubtreeSz, const char *fileNameOut);
-PhylogenyTreeBasic *ConsPhyTreeShrinkIdentSubtrees(PhylogenyTreeBasic *ptreeIn,
-                                                   int maxIdentSubtreeSz,
-                                                   bool fIdConsecutive = false);
-void ChangebackLeafLabelForTreeWithZeroBaseId(PhylogenyTreeBasic *ptree,
-                                              TaxaMapper *pTMapper);
-void ChangeLeafIntLabelOfTree(PhylogenyTreeBasic &treeToChange,
-                              const map<int, int> &mapOldIntLblToNewIntLbl,
-                              bool fSetUserLblToo = false);
+int GetNewickNumLeaves(const string &strNewick, char chSepLeft = '(', char chSepRight = ')', char midSep = ',');
+bool GetTripleType(TreeNode *pn1, TreeNode *pn2, TreeNode *pn3, pair<pair<TreeNode *, TreeNode *>, TreeNode *> &triple);
+bool ReadinPhyloTreesNewick(ifstream &inFile, int numLeaves, vector<PhylogenyTreeBasic *> &treePtrList, TaxaMapper *pTMapper = NULL);
+void InitRandomTree(PhylogenyTreeBasic &treeToInit, int numTaxa, int rndSeed = -1);
+void CreatePhyTreeWithRootedSplits(PhylogenyTreeBasic &treeToProc, int numTaxa, const set<set<int>> &setGivenSplits);
+void DumpAllSubtreesWithTaxaSize(const vector<PhylogenyTreeBasic *> &listPtrGTrees, int numTaxonSubtree, const char *fileNameOut);
+void DumpAllSubtreesWithBoundedSize(const vector<PhylogenyTreeBasic *> &listPtrGTrees, int maxSzSubtree, int maxIdentSubtreeSz, const char *fileNameOut);
+PhylogenyTreeBasic *ConsPhyTreeShrinkIdentSubtrees(PhylogenyTreeBasic *ptreeIn, int maxIdentSubtreeSz, bool fIdConsecutive = false);
+void ChangebackLeafLabelForTreeWithZeroBaseId(PhylogenyTreeBasic *ptree, TaxaMapper *pTMapper);
+void ChangeLeafIntLabelOfTree(PhylogenyTreeBasic &treeToChange, const map<int, int> &mapOldIntLblToNewIntLbl, bool fSetUserLblToo = false);
 void AssignConsecutiveIdsForTree(PhylogenyTreeBasic &treeToChange);
-bool ConvPhyloTreesToZeroBasedId(vector<PhylogenyTreeBasic *> &treePtrList,
-                                 TaxaMapper *pTMapper);
-void RandTrimLeavesFromTree(PhylogenyTreeBasic *ptreeToTrim,
-                            int numLeavesRemain);
-PhylogenyTreeBasic *ConsPhyTreeSubsetTaxa(PhylogenyTreeBasic *ptreeIn,
-                                          const set<int> &setTaxaKept);
+bool ConvPhyloTreesToZeroBasedId(vector<PhylogenyTreeBasic *> &treePtrList, TaxaMapper *pTMapper);
+void RandTrimLeavesFromTree(PhylogenyTreeBasic *ptreeToTrim, int numLeavesRemain);
+PhylogenyTreeBasic *ConsPhyTreeSubsetTaxa(PhylogenyTreeBasic *ptreeIn, const set<int> &setTaxaKept);
 string ConsEdgeLabeTree(const string &strNWWithLabels);
 
 #endif // PHYLOGENY_TREE_H
diff --git a/trisicell/external/scistree/RBT.cpp b/trisicell/external/scistree/RBT.cpp
index 92b0e5f..535471e 100644
--- a/trisicell/external/scistree/RBT.cpp
+++ b/trisicell/external/scistree/RBT.cpp
@@ -1,1196 +1,730 @@
 #include "RBT.h"
 
-//////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////
 
 // useful stuff
-int GetNumRBT(int nlv) {
-  int res = 1;
-  for (int nr = 2; nr < nlv; ++nr) {
-    //
-    res *= 2 * nr - 1;
-  }
-  return res;
+int GetNumRBT(int nlv)
+{
+    int res = 1;
+    for (int nr = 2; nr < nlv; ++nr)
+    {
+        //
+        res *= 2 * nr - 1;
+    }
+    return res;
 }
 
-//////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////
 
 int RBTNode ::idNodeNextToUse = 20000;
 
-RBTNode ::RBTNode(RBTNode *pLeftParam, RBTNode *pRightParam)
-    : pLeft(pLeftParam), pRight(pRightParam), pParent(NULL) {
-  YW_ASSERT_INFO(pLeft != NULL && pRight != NULL, "Can not be NULL");
+RBTNode ::RBTNode(RBTNode *pLeftParam, RBTNode *pRightParam) : pLeft(pLeftParam), pRight(pRightParam), pParent(NULL)
+{
+    YW_ASSERT_INFO(pLeft != NULL && pRight != NULL, "Can not be NULL");
 
-  // ensure children's parent are set
-  pLeft->SetParent(this);
-  pRight->SetParent(this);
-  lvid = idNodeNextToUse++;
-  SetHeight(-1.0);
+    // ensure children's parent are set
+    pLeft->SetParent(this);
+    pRight->SetParent(this);
+    lvid = idNodeNextToUse++;
+    SetHeight(-1.0);
 }
 
 // operation
-RBTNode *RBTNode ::CopySubTree() {
-  // copy entire subtree under it
-  if (IsLeaf() == false) {
-    // copy left/right subtrees
-    RBTNode *pLT = pLeft->CopySubTree();
-    RBTNode *pRT = pRight->CopySubTree();
-    RBTNode *pNewNode = new RBTNode(pLT, pRT);
-    // cout << "copy a internal node " <<", newnode = " << (int) pNewNode <<
-    // endl;
-    return pNewNode;
-  } else {
-    // copy self only
-    RBTNode *pNewNode = new RBTNode(this->lvid);
-    pNewNode->SetHeight(-1.0);
-    // cout << "copy a leaf node "<< ", lvid = " << lvid  <<", newnode = " <<
-    // (int) pNewNode << endl;
-    return pNewNode;
-  }
-}
-
-void RBTNode ::AddToLeftEdge(int lvidParam) {
-  // ensure this is not a leaf
-  YW_ASSERT_INFO(IsLeaf() == false, "Can not be a leaf");
-
-  RBTNode *pInternal = pLeft->AddSibling(lvidParam);
-  pInternal->SetParent(this);
-  this->SetLeftChild(pInternal);
-}
-
-void RBTNode ::AddToRightEdge(int lvidParam) {
-  YW_ASSERT_INFO(IsLeaf() == false, "Can not be a leaf");
-  RBTNode *pInternal = pRight->AddSibling(lvidParam);
-  pInternal->SetParent(this);
-  this->SetRightChild(pInternal);
-}
-
-RBTNode *RBTNode ::AddSibling(int lvidParam) {
-  // create a new sibling and a root
-  RBTNode *pOther = new RBTNode(lvidParam);
-  // cout << "Adding a leaf " << (int) pOther << endl;
-  int existId = GetMinLeaveId();
-  if (existId < lvidParam) {
-    //
-    RBTNode *pParent = new RBTNode(this, pOther);
-    // cout << "Adding a node " << (int) pParent << endl;
-    return pParent;
-  } else {
-    RBTNode *pParent = new RBTNode(pOther, this);
-    // cout << "Adding a node " << (int) pParent << endl;
-    return pParent;
-  }
-}
-
-void RBTNode ::DetachSubtree() {
-  if (this->pParent == NULL) {
-    // nothing needs to be done, since we are trying to sepearet the WHOLE tree
-    // it does not make sense....
-    return;
-  }
-
-  // detach this node (and its descendents) from the rest of the tree
-  // note this include free up the current parent
-  // this function needs to mantain the coherance of the other tree
-
-  // First seperate the current parent
-  RBTNode *pOther = this->pParent->GetLeftChild();
-  if (this->IsLeftChild() == true) {
-    pOther = this->pParent->GetRightChild();
-  }
-  pOther->SetParent(this->pParent->GetParent());
-  if (this->pParent->GetParent() != NULL) {
-    if (this->pParent->IsLeftChild() == true) {
-      this->pParent->GetParent()->SetLeftChild(pOther);
-    } else {
-      this->pParent->GetParent()->SetRightChild(pOther);
-    }
-  }
-  this->pParent->SetLeftChild(NULL);
-  this->pParent->SetRightChild(NULL);
-  delete this->pParent;
-
-  // need to readjust the tree since the remainig tree may have problem
-  // with left/right ordering
-  pOther->AdjustLRChildUpwards();
-
-  // Finally set the current node's par to emtpy (meaning detached)
-  this->pParent = NULL;
-}
-
-RBTNode *RBTNode ::AttachSubtree(RBTNode *pSib) {
-  YW_ASSERT_INFO(pSib != NULL, "Fail 2.0");
-
-  // reattach the subtree with its sibling
-  // we need to create a new node (which will be returned)
-  // this new node could be the new root
-  bool fLeftOfSib = true;
-  if (this->GetMinLeaveId() > pSib->GetMinLeaveId()) {
-    fLeftOfSib = false;
-  }
-  // cout << "psib = " << (int) pSib << endl;
-  // save the original par of psib
-  RBTNode *pParSib = pSib->GetParent();
-
-  RBTNode *pPar;
-  if (fLeftOfSib == true) {
-    pPar = new RBTNode(this, pSib);
-  } else {
-    pPar = new RBTNode(pSib, this);
-  }
-  pPar->SetParent(pParSib);
-  // cout << "After set parent, create a new node = " << (int) pPar << endl;
-  if (pParSib != NULL) {
-    if (pParSib->GetLeftChild() == pSib) {
-      // cout << "set " << (int) pParSib << " left child to " << (int) pPar <<
-      // endl;
-      pParSib->SetLeftChild(pPar);
-    } else {
-      // cout <<"set " << (int) pParSib << " right child to " << (int) pPar <<
-      // endl;
-      pParSib->SetRightChild(pPar);
-    }
-  }
-
-  // make sure tree is in right topology
-  AdjustLRChildUpwards();
-
-  // cout << "exit from attachsubtree..\n";
-  return pPar;
-}
-
-RBTNode *RBTNode ::FindLeaf(int lvidParam, int &ponid) {
-  // IMPORTANT, in traversal,
-  // assume post-order search, and return the how many nodes visited so far
-  // Note, ponid should be initialized upon entry (to -1)
-
-  if (IsLeaf() == false) {
-    RBTNode *plv = pLeft->FindLeaf(lvidParam, ponid);
-    if (plv != NULL) {
-      return plv;
-    }
-    plv = pRight->FindLeaf(lvidParam, ponid);
-    if (plv != NULL) {
-      return plv;
-    }
-  }
-  // otherwise, increment counter
-  ponid++;
-  if (IsLeaf() == true) {
-    // cout << "visiting leaf = " << this->lvid << ", to search for " <<
-    // lvidParam << endl;
-    if (this->lvid == lvidParam) {
-      return this;
-    } else {
-      return NULL;
-    }
-  }
-  return NULL;
-}
-
-bool RBTNode ::RemoveLeafSelf() {
-  // only remove self if it is a leaf
-  if (IsLeaf() == false) {
-    return false;
-  }
-  // remove this node
-  if (this->pParent != NULL) {
-    // need to rearrange the tree to ensure binary shape
+RBTNode *RBTNode ::CopySubTree()
+{
+    // copy entire subtree under it
+    if (IsLeaf() == false)
+    {
+        // copy left/right subtrees
+        RBTNode *pLT = pLeft->CopySubTree();
+        RBTNode *pRT = pRight->CopySubTree();
+        RBTNode *pNewNode = new RBTNode(pLT, pRT);
+        //cout << "copy a internal node " <<", newnode = " << (int) pNewNode << endl;
+        return pNewNode;
+    }
+    else
+    {
+        // copy self only
+        RBTNode *pNewNode = new RBTNode(this->lvid);
+        pNewNode->SetHeight(-1.0);
+        //cout << "copy a leaf node "<< ", lvid = " << lvid  <<", newnode = " << (int) pNewNode << endl;
+        return pNewNode;
+    }
+}
+
+void RBTNode ::AddToLeftEdge(int lvidParam)
+{
+    // ensure this is not a leaf
+    YW_ASSERT_INFO(IsLeaf() == false, "Can not be a leaf");
+
+    RBTNode *pInternal = pLeft->AddSibling(lvidParam);
+    pInternal->SetParent(this);
+    this->SetLeftChild(pInternal);
+}
+
+void RBTNode ::AddToRightEdge(int lvidParam)
+{
+    YW_ASSERT_INFO(IsLeaf() == false, "Can not be a leaf");
+    RBTNode *pInternal = pRight->AddSibling(lvidParam);
+    pInternal->SetParent(this);
+    this->SetRightChild(pInternal);
+}
+
+RBTNode *RBTNode ::AddSibling(int lvidParam)
+{
+    // create a new sibling and a root
+    RBTNode *pOther = new RBTNode(lvidParam);
+    //cout << "Adding a leaf " << (int) pOther << endl;
+    int existId = GetMinLeaveId();
+    if (existId < lvidParam)
+    {
+        //
+        RBTNode *pParent = new RBTNode(this, pOther);
+        //cout << "Adding a node " << (int) pParent << endl;
+        return pParent;
+    }
+    else
+    {
+        RBTNode *pParent = new RBTNode(pOther, this);
+        //cout << "Adding a node " << (int) pParent << endl;
+        return pParent;
+    }
+}
+
+void RBTNode ::DetachSubtree()
+{
+    if (this->pParent == NULL)
+    {
+        // nothing needs to be done, since we are trying to sepearet the WHOLE tree
+        // it does not make sense....
+        return;
+    }
+
+    // detach this node (and its descendents) from the rest of the tree
+    // note this include free up the current parent
+    // this function needs to mantain the coherance of the other tree
+
+    // First seperate the current parent
     RBTNode *pOther = this->pParent->GetLeftChild();
-    if (IsLeftChild() == true) {
-      // cout << "Switch to the right\n";
-      pOther = this->pParent->GetRightChild();
+    if (this->IsLeftChild() == true)
+    {
+        pOther = this->pParent->GetRightChild();
     }
-    // skip the parent
     pOther->SetParent(this->pParent->GetParent());
-    // cout << "after getparent\n";
-    if (this->pParent->GetParent() != NULL) {
-      // cout << "Still need to set parent's parent\n";
-      // also need to ensure the proper pointer
-      if (pParent->IsLeftChild() == true) {
-        pParent->GetParent()->SetLeftChild(pOther);
-      } else {
-        pParent->GetParent()->SetRightChild(pOther);
-      }
-    }
-    // cout << "delete the old parent\n";
-    // free up the parent
-    pParent->SetLeftChild(NULL);
-    pParent->SetRightChild(NULL);
+    if (this->pParent->GetParent() != NULL)
+    {
+        if (this->pParent->IsLeftChild() == true)
+        {
+            this->pParent->GetParent()->SetLeftChild(pOther);
+        }
+        else
+        {
+            this->pParent->GetParent()->SetRightChild(pOther);
+        }
+    }
+    this->pParent->SetLeftChild(NULL);
+    this->pParent->SetRightChild(NULL);
     delete this->pParent;
-    // delete this;
 
-    // make sure the left is ALWAYS smaller than RIGHT
-    // BUT SINCE WE ARE REMOVING IN DESCENDING ORDER
-    // so it does not matter here. But need to be fixed
-    // TBD
+    // need to readjust the tree since the remainig tree may have problem
+    // with left/right ordering
+    pOther->AdjustLRChildUpwards();
+
+    // Finally set the current node's par to emtpy (meaning detached)
+    this->pParent = NULL;
+}
+
+RBTNode *RBTNode ::AttachSubtree(RBTNode *pSib)
+{
+    YW_ASSERT_INFO(pSib != NULL, "Fail 2.0");
+
+    // reattach the subtree with its sibling
+    // we need to create a new node (which will be returned)
+    // this new node could be the new root
+    bool fLeftOfSib = true;
+    if (this->GetMinLeaveId() > pSib->GetMinLeaveId())
+    {
+        fLeftOfSib = false;
+    }
+    //cout << "psib = " << (int) pSib << endl;
+    // save the original par of psib
+    RBTNode *pParSib = pSib->GetParent();
+
+    RBTNode *pPar;
+    if (fLeftOfSib == true)
+    {
+        pPar = new RBTNode(this, pSib);
+    }
+    else
+    {
+        pPar = new RBTNode(pSib, this);
+    }
+    pPar->SetParent(pParSib);
+    //cout << "After set parent, create a new node = " << (int) pPar << endl;
+    if (pParSib != NULL)
+    {
+        if (pParSib->GetLeftChild() == pSib)
+        {
+            //cout << "set " << (int) pParSib << " left child to " << (int) pPar << endl;
+            pParSib->SetLeftChild(pPar);
+        }
+        else
+        {
+            //cout <<"set " << (int) pParSib << " right child to " << (int) pPar << endl;
+            pParSib->SetRightChild(pPar);
+        }
+    }
 
-  } else {
-    // delete this;
-  }
+    // make sure tree is in right topology
+    AdjustLRChildUpwards();
 
-  // cout << "done\n";
-  return true;
+    //cout << "exit from attachsubtree..\n";
+    return pPar;
 }
 
-// access
-int RBTNode ::GetMinLeaveId() {
-  YW_ASSERT_INFO(IsLeaf() == true || (pLeft != NULL && pRight != NULL),
-                 "Children wrong.");
-  if (IsLeaf() == true) {
-    return GetLeafId();
-  } else {
-    int lid = pLeft->GetMinLeaveId();
-    int rid = pRight->GetMinLeaveId();
-    if (lid < rid) {
-      return lid;
-    } else {
-      return rid;
-    }
-  }
-}
-
-RBTNode *RBTNode ::GetLeftMostChild() {
-  RBTNode *pcur = this;
-  while (pcur->IsLeaf() == false) {
-    pcur = pcur->GetLeftChild();
-  }
-  return pcur;
-}
-
-RBTNode *RBTNode ::GetSibling() {
-  if (GetParent() == NULL) {
+RBTNode *RBTNode ::FindLeaf(int lvidParam, int &ponid)
+{
+    // IMPORTANT, in traversal,
+    // assume post-order search, and return the how many nodes visited so far
+    // Note, ponid should be initialized upon entry (to -1)
+
+    if (IsLeaf() == false)
+    {
+        RBTNode *plv = pLeft->FindLeaf(lvidParam, ponid);
+        if (plv != NULL)
+        {
+            return plv;
+        }
+        plv = pRight->FindLeaf(lvidParam, ponid);
+        if (plv != NULL)
+        {
+            return plv;
+        }
+    }
+    // otherwise, increment counter
+    ponid++;
+    if (IsLeaf() == true)
+    {
+        //cout << "visiting leaf = " << this->lvid << ", to search for " << lvidParam << endl;
+        if (this->lvid == lvidParam)
+        {
+            return this;
+        }
+        else
+        {
+            return NULL;
+        }
+    }
     return NULL;
-  } else {
-    if (IsLeftChild() == true) {
-      return GetParent()->GetRightChild();
-    } else {
-      return GetParent()->GetLeftChild();
-    }
-  }
-}
-
-bool RBTNode ::IsLeaf() const { return pLeft == NULL && pRight == NULL; }
-
-int RBTNode ::GetNumLeavesUnder() {
-  // cout << "current node = " << (int) this << endl;
-  YW_ASSERT_INFO(IsLeaf() == true || (pLeft != NULL && pRight != NULL),
-                 "Children wrong.");
-  if (IsLeaf() == true) {
-    return 1;
-  } else {
-    return pLeft->GetNumLeavesUnder() + pRight->GetNumLeavesUnder();
-  }
-}
-
-void RBTNode ::GetLeaves(set<int> &lvs) {
-  // cout << "Get leaves so far for node = " << (int) this << ":  ";
-  // DumpIntSet( lvs );
-  YW_ASSERT_INFO(IsLeaf() == true || (pLeft != NULL && pRight != NULL),
-                 "Children wrong.");
-  if (IsLeaf() == true) {
-    lvs.insert(this->lvid);
-  } else {
-    pLeft->GetLeaves(lvs);
-    pRight->GetLeaves(lvs);
-  }
-}
-
-bool RBTNode ::IsLeftChild() {
-  // if it has no parent, consider left
-  if (this->pParent == NULL) {
-    return true;
-  }
-  if (this->pParent->GetLeftChild() == this) {
-    return true;
-  } else {
-    return false;
-  }
 }
 
-// memory. free recursively
-void RBTNode ::Clear() {
-  // NOTE: the current node is not deleted!!!!
-  // recursively delete
-  if (pLeft != NULL) {
-    pLeft->Clear();
-    delete pLeft;
-    pLeft = NULL;
-  }
-  if (pRight != NULL) {
-    pRight->Clear();
-    delete pRight;
-    pRight = NULL;
-  }
-
-  // delete this;
-}
-
-void RBTNode ::AdjustLRChildUpwards() {
-  // this function re-adjust the left/right subtrees, starting
-  // from the current node, and upwards the tree
-  // This is because when something is removed, we have to
-  // make sure the tree topology is still what is like before:
-  // the left subtree must have its min-leaf lower than right
-  // subtree
-  RBTNode *pcur = this;
-  while (pcur != NULL) {
-    //
-    if (pcur->IsLeaf() == false && pcur->GetLeftChild()->GetMinLeaveId() >
-                                       pcur->GetRightChild()->GetMinLeaveId()) {
-      // switch it
-      RBTNode *ptmp = pcur->GetLeftChild();
-      pcur->SetLeftChild(pcur->GetRightChild());
-      pcur->SetRightChild(ptmp);
-    }
-
-    // trace upwards
-    pcur = pcur->GetParent();
-  }
-}
-
-void RBTNode ::Dump() const {
-  // print leaf only
-  // this is simply do a post-order traversal
-  if (IsLeaf() == true) {
-    cout << " " << this->lvid;
-    if (GetHeight() >= 0) {
-      cout << "[" << GetHeight() << "]";
-    }
-    cout << " ";
-  } else {
-    cout << "( ";
-    this->GetLeftChild()->Dump();
-    this->GetRightChild()->Dump();
-    cout << " )";
-    if (GetHeight() >= 0) {
-      cout << "[" << GetHeight() << "]";
-    }
-    cout << " ";
-  }
-}
-
-string RBTNode ::GetNewick() const {
-  // if leaf, fill in the leaf id
-  if (IsLeaf() == true) {
-    char buf[100];
-    sprintf(buf, "%d", this->lvid);
-    return string(buf);
-  } else {
-    string s1 = this->GetLeftChild()->GetNewick();
-    string s2 = this->GetRightChild()->GetNewick();
-    return string("(") + s1 + string(",") + s2 + string(")");
-  }
-}
-
-void RBTNode ::AddSiblingToLeaf(int lvid) {
-  // add a sibling to the current node, which must be a leaf
-  YW_ASSERT_INFO(IsLeaf() == true, "Can not add to a non-leaf node");
-
-  // create a new node
-  RBTNode *pnode = new RBTNode(lvid);
-
-  // add it
-  pnode->AttachSubtree(this);
-
-  // create a new node
-  // but remeber the parent first
-  //	RBTNode *ppar = GetParent();
-  //	bool fLeftChild = IsLeftChild();
-  //	YW_ASSERT_INFO( ppar != NULL, "Can not be NULL" );
-  //	RBTNode *pinternal = AddSibling( lvid );
-  // setup connection
-  //	pinternal->SetParent( ppar );
-  //	if( fLeftChild == true )
-  //	{
-  //		// add to the left
-  //		ppar->SetLeftChild( pinternal );
-  //	}
-  //	else
-  //	{
-  //		ppar->SetRightChild( pinternal );
-  //	}
-}
-
-void RBTNode ::OutputNodeGML(ofstream &outFile) {
-  outFile << "node [\n";
-  char name[100];
-  // the name is equal to it
-  if (IsLeaf() == true) {
-    name[0] = 'v';
-    sprintf(&name[1], "%d", GetLeafId());
-  } else {
-    name[0] = ' ';
-    name[1] = '\0';
-  }
-  outFile << "id " << GetLeafId() << endl;
-  outFile << "label ";
-  OutputQuotedString(outFile, name);
-  outFile << endl;
-  outFile << "defaultAtrribute   1\n";
-  outFile << "]\n";
-  // cout << "Output one node: id = " << GetId() << "\n";
-  // handle the children
-  if (IsLeaf() == false) {
-    GetLeftChild()->OutputNodeGML(outFile);
-    GetRightChild()->OutputNodeGML(outFile);
-  }
-}
-
-void RBTNode ::OutputEdgeGML(ofstream &outFile) {
-  char name[100];
-  int id1 = GetLeafId();
-  if (IsLeaf() == false) {
-    for (int i = 0; i < 2; ++i) {
-      int id2 = GetLeftChild()->GetLeafId();
-      if (i == 1) {
-        id2 = GetRightChild()->GetLeafId();
-      }
-
-      name[0] = ' ';
-      name[1] = '\0';
-      //		sprintf(&name[1], "%d-%d", id1, id2 );
-      // cout << "Output one edge: " << id1 << ", " << id2 << endl;
-
-      outFile << "edge [\n";
-      outFile << "source " << id1 << endl;
-      outFile << "target  " << id2 << endl;
-      outFile << "label ";
-      // cout << "edge label = " << name << endl;
-      OutputQuotedString(outFile, name);
-      outFile << "\n";
-      outFile << "]\n";
-    }
-  }
-  // handle the children
-  if (IsLeaf() == false) {
-    GetLeftChild()->OutputEdgeGML(outFile);
-    GetRightChild()->OutputEdgeGML(outFile);
-  }
-}
-
-//////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-//
-//////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-// different ways of initializing a tree
-// it can be by a supplied id
-RBT ::RBT(int numLeaves, RBT_ID tid) {
-  Init();
-
-  // save the id
-  this->numLeaves = numLeaves;
-  this->tid = tid;
-  YW_ASSERT_INFO(numLeaves >= 3, "Too few leaves");
+bool RBTNode ::RemoveLeafSelf()
+{
+    // only remove self if it is a leaf
+    if (IsLeaf() == false)
+    {
+        return false;
+    }
+    // remove this node
+    if (this->pParent != NULL)
+    {
+        // need to rearrange the tree to ensure binary shape
+        RBTNode *pOther = this->pParent->GetLeftChild();
+        if (IsLeftChild() == true)
+        {
+            //cout << "Switch to the right\n";
+            pOther = this->pParent->GetRightChild();
+        }
+        // skip the parent
+        pOther->SetParent(this->pParent->GetParent());
+        //cout << "after getparent\n";
+        if (this->pParent->GetParent() != NULL)
+        {
+            //cout << "Still need to set parent's parent\n";
+            // also need to ensure the proper pointer
+            if (pParent->IsLeftChild() == true)
+            {
+                pParent->GetParent()->SetLeftChild(pOther);
+            }
+            else
+            {
+                pParent->GetParent()->SetRightChild(pOther);
+            }
+        }
+        //cout << "delete the old parent\n";
+        // free up the parent
+        pParent->SetLeftChild(NULL);
+        pParent->SetRightChild(NULL);
+        delete this->pParent;
+        //delete this;
+
+        // make sure the left is ALWAYS smaller than RIGHT
+        // BUT SINCE WE ARE REMOVING IN DESCENDING ORDER
+        // so it does not matter here. But need to be fixed
+        // TBD
+    }
+    else
+    {
+        //delete this;
+    }
 
-  // construct by the tid
-  ReconstructById(tid);
+    //cout << "done\n";
+    return true;
 }
 
-RBT ::RBT(const RBT &rhs) {
-  this->numLeaves = rhs.numLeaves;
-  this->tid = rhs.tid;
-  this->pRoot = rhs.pRoot->CopySubTree();
+// access
+int RBTNode ::GetMinLeaveId()
+{
+    YW_ASSERT_INFO(IsLeaf() == true || (pLeft != NULL && pRight != NULL), "Children wrong.");
+    if (IsLeaf() == true)
+    {
+        return GetLeafId();
+    }
+    else
+    {
+        int lid = pLeft->GetMinLeaveId();
+        int rid = pRight->GetMinLeaveId();
+        if (lid < rid)
+        {
+            return lid;
+        }
+        else
+        {
+            return rid;
+        }
+    }
 }
 
-RBT &RBT ::operator=(const RBT &rhs) {
-  // get rid of current
-  if (this->pRoot != NULL) {
-    delete this->pRoot;
-    this->pRoot = NULL;
-  }
-
-  this->numLeaves = rhs.numLeaves;
-  this->tid = rhs.tid;
-  this->pRoot = rhs.pRoot->CopySubTree();
+RBTNode *RBTNode ::GetLeftMostChild()
+{
+    RBTNode *pcur = this;
+    while (pcur->IsLeaf() == false)
+    {
+        pcur = pcur->GetLeftChild();
+    }
+    return pcur;
+}
 
-  return *this;
+RBTNode *RBTNode ::GetSibling()
+{
+    if (GetParent() == NULL)
+    {
+        return NULL;
+    }
+    else
+    {
+        if (IsLeftChild() == true)
+        {
+            return GetParent()->GetRightChild();
+        }
+        else
+        {
+            return GetParent()->GetLeftChild();
+        }
+    }
 }
 
-RBT ::RBT(int numLeaves, const vector<int> &listNodeLabels,
-          const vector<int> &listParentNodePos,
-          const vector<double> &listEdgeDist) {
-  this->numLeaves = numLeaves;
-  this->tid = -1; // in this mode, we do not care about tid
-  this->pRoot = NULL;
-  // construct by the tid
-  ReconstructByPlainDesc(listNodeLabels, listParentNodePos, listEdgeDist);
+bool RBTNode ::IsLeaf() const
+{
+    return pLeft == NULL && pRight == NULL;
 }
 
-RBT ::~RBT() {
-  // cout << "INside destructor\n";
-  // cout << "number of leaves = " << pRoot->GetNumLeavesUnder() << endl;
-  this->pRoot->Clear();
-  delete pRoot;
-  pRoot = NULL;
-  // cout << "done with one destructor\n";
+int RBTNode ::GetNumLeavesUnder()
+{
+    //cout << "current node = " << (int) this << endl;
+    YW_ASSERT_INFO(IsLeaf() == true || (pLeft != NULL && pRight != NULL), "Children wrong.");
+    if (IsLeaf() == true)
+    {
+        return 1;
+    }
+    else
+    {
+        return pLeft->GetNumLeavesUnder() + pRight->GetNumLeavesUnder();
+    }
 }
 
-// ID functions
-RBT_ID RBT ::GetId() {
-  if (tid >= 0) {
-    // return the cached one
-    return tid;
-  }
-  // get it
-  this->tid = MapToId(); // indicate it is invalid
-  return this->tid;
-}
-
-void RBT ::OutputGML(const char *fileName) {
-  // Now output a file in GML format
-  // First create a new name
-  string name = fileName;
-  // cout << "num edges = " << listEdges.size() << endl;
-
-  DEBUG("FileName=");
-  DEBUG(name);
-  DEBUG("\n");
-  // Now open file to write out
-  ofstream outFile(name.c_str());
-
-  // First output some header info
-  outFile << "graph [\n";
-  outFile << "comment ";
-  OutputQuotedString(outFile, "Automatically generated by Graphing tool");
-  outFile << "\ndirected  1\n";
-  outFile << "id  1\n";
-  outFile << "label ";
-  OutputQuotedString(outFile, "To be more meaningful later....\n");
-  // cout << "Here we go\n";
-  // Now output all the vertices by simply calling through root node
-  pRoot->OutputNodeGML(outFile);
-
-  // Now output all the edges by calling through the root
-  pRoot->OutputEdgeGML(outFile);
-
-  // Finally quite after closing file
-  outFile << "\n]\n";
-  outFile.close();
+void RBTNode ::GetLeaves(set<int> &lvs)
+{
+    //cout << "Get leaves so far for node = " << (int) this << ":  ";
+    //DumpIntSet( lvs );
+    YW_ASSERT_INFO(IsLeaf() == true || (pLeft != NULL && pRight != NULL), "Children wrong.");
+    if (IsLeaf() == true)
+    {
+        lvs.insert(this->lvid);
+    }
+    else
+    {
+        pLeft->GetLeaves(lvs);
+        pRight->GetLeaves(lvs);
+    }
 }
 
-// splits functions
-bool RBT ::IsSplitContained(const set<int> &split) {
-  // simply check the map
-  if (mapSplitsInTree.size() == 0) {
-    // Need to figure out splits
-    RetrieveSplits();
-  }
-  return mapSplitsInTree.find(split) != mapSplitsInTree.end();
+bool RBTNode ::IsLeftChild()
+{
+    // if it has no parent, consider left
+    if (this->pParent == NULL)
+    {
+        return true;
+    }
+    if (this->pParent->GetLeftChild() == this)
+    {
+        return true;
+    }
+    else
+    {
+        return false;
+    }
 }
 
-void RBT ::GetAllSplits(vector<set<int> > &listSplits) {
-  if (mapSplitsInTree.size() == 0) {
-    // Need to figure out splits
-    RetrieveSplits();
-  }
+// memory. free recursively
+void RBTNode ::Clear()
+{
+    // NOTE: the current node is not deleted!!!!
+    // recursively delete
+    if (pLeft != NULL)
+    {
+        pLeft->Clear();
+        delete pLeft;
+        pLeft = NULL;
+    }
+    if (pRight != NULL)
+    {
+        pRight->Clear();
+        delete pRight;
+        pRight = NULL;
+    }
 
-  listSplits.clear();
-  for (map<set<int>, bool>::iterator it = mapSplitsInTree.begin();
-       it != mapSplitsInTree.end(); ++it) {
-    // put it
-    listSplits.push_back(it->first);
-  }
+    //delete this;
 }
 
-// SPR function
-void RBT ::FindSPRDistOneNgbrs(set<int> &ngbrIds) {
-  // Double loop: first try every subtree of the original
-  // then try to attach it to each of the original node
-  // note, we do not want to re-generate trees many times
-  // so we need to re-attach the detached subtrees each time we need
-  RBT treeOpt(*this);
-
-  TraversRecord tr;
-  treeOpt.InitPostorderTranvers(tr);
-  while (true) {
-    RBTNode *pCurNode = tr.pCurNode;
-    // cout << "Outer loop pcurnode = " << (int)pCurNode << ", lvid = " <<
-    // pCurNode->GetLeafId() << endl;
-    if (pCurNode->GetParent() == NULL) {
-      // do not do the whole tree to remove, that is not valid
-      break;
-    }
-
-    // remember the sibling so we can re-attach it at the end
-    RBTNode *pSib = pCurNode->GetParent()->GetLeftChild();
-    if (pSib == pCurNode) {
-      pSib = pCurNode->GetParent()->GetRightChild();
-    }
-
-    // now detach the subtree
-    // need to handle the special case when the root is removed
-    if (pCurNode->GetParent()->GetParent() == NULL) {
-      treeOpt.pRoot = pSib;
-    }
-    pCurNode->DetachSubtree();
-    // set<int> clvs;
-    // pCurNode->GetLeaves( clvs );
-    // cout << "Current subtree has leafs = ";
-    // DumpIntSet( clvs );
-    // set<int> rlvs;
-    // treeOpt.pRoot->GetLeaves( rlvs );
-    // cout << "Remaing tree has leafs = ";
-    // DumpIntSet( rlvs );
-    // cout << "Current subtree = ";
-    // treeOpt.Dump();
-
-    // now do another search
-    TraversRecord tr2;
-    treeOpt.InitPostorderTranvers(tr2);
-    while (true) {
-      // set<int> rlvs3;
-      // treeOpt.pRoot->GetLeaves( rlvs3 );
-      // cout << "During inner loop start, tree has leafs = ";
-      // DumpIntSet( rlvs3 );
-      // cout << "During internal loop, subtree = ";
-      // treeOpt.Dump();
-
-      // cout << "Consider inner node = " << (int)tr2.pCurNode << ", leaf id = "
-      // << tr2.pCurNode->GetLeafId() << endl;
-      // try to re-attach to the node
-      RBTNode *pNewPar = pCurNode->AttachSubtree(tr2.pCurNode);
-      if (tr2.pCurNode == treeOpt.pRoot) {
-        // we created a new root
-        treeOpt.pRoot = pNewPar;
-      }
-
-      // get a maped id
-      ngbrIds.insert(treeOpt.MapToId());
-      // cout << "The SPR transformed subtree = ";
-      // treeOpt.Dump();
-
-      // now we need to detach the node again
-      if (pCurNode->GetParent()->IsRoot() == true) {
-        // when root is removed, we have to re-adjust the root
-        treeOpt.pRoot = tr2.pCurNode;
-      }
-      pCurNode->DetachSubtree();
-
-      // move to next
-      if (treeOpt.NextPostorderTranvers(tr2) == false) {
-        break;
-      }
-    }
-    // cout << "Now attach the current subtree...\n";
-    // now re-attach the node
-    RBTNode *pnode = pCurNode->AttachSubtree(pSib);
-    if (treeOpt.pRoot == pSib) {
-      // cout << "readjust root ...\n";
-      // we need to update the root again
-      treeOpt.pRoot = pnode;
-    }
-    // set<int> rlvs2;
-    // treeOpt.pRoot->GetLeaves( rlvs2 );
-    // cout << "After reattaching at the end of one round, tree has leafs = ";
-    // DumpIntSet( rlvs2 );
-    // cout << "After re-attaching the subtree = ";
-    // treeOpt.Dump();
-
-    // move to next
-    if (treeOpt.NextPostorderTranvers(tr) == false) {
-      break;
-    }
-  }
+void RBTNode ::AdjustLRChildUpwards()
+{
+    // this function re-adjust the left/right subtrees, starting
+    // from the current node, and upwards the tree
+    // This is because when something is removed, we have to
+    // make sure the tree topology is still what is like before:
+    // the left subtree must have its min-leaf lower than right
+    // subtree
+    RBTNode *pcur = this;
+    while (pcur != NULL)
+    {
+        //
+        if (pcur->IsLeaf() == false &&
+            pcur->GetLeftChild()->GetMinLeaveId() > pcur->GetRightChild()->GetMinLeaveId())
+        {
+            // switch it
+            RBTNode *ptmp = pcur->GetLeftChild();
+            pcur->SetLeftChild(pcur->GetRightChild());
+            pcur->SetRightChild(ptmp);
+        }
 
-#if 0
-	set<RBT> ngbrTrees;
-	FindSPRDistOneNgbrs(ngbrTrees);
-	for( set<RBT> :: iterator it = ngbrTrees.begin(); it != ngbrTrees.end(); ++it )
-	{
-		RBT tr = *it;
-		ngbrIds.insert( tr.MapToId()  );
-	}
-#endif
-  // get rid of the same tree
-  ngbrIds.erase(GetId());
-}
-
-void RBT ::FindSPRDistOneNgbrs(vector<RBT *> &ngbrTrees) {
-  // Double loop: first try every subtree of the original
-  // then try to attach it to each of the original node
-  // note, we do not want to re-generate trees many times
-  // so we need to re-attach the detached subtrees each time we need
-  RBT treeOpt(*this);
-  // cout << "RBT: find SPR ngbr: current tree: " << treeOpt.GetNewick() <<
-  // endl;
-
-  TraversRecord tr;
-  treeOpt.InitPostorderTranvers(tr);
-  while (true) {
-    RBTNode *pCurNode = tr.pCurNode;
-    // cout << "Outer loop pcurnode = " << (int)pCurNode << ", lvid = " <<
-    // pCurNode->GetLeafId() << endl;
-    if (pCurNode->GetParent() == NULL) {
-      // do not do the whole tree to remove, that is not valid
-      break;
-    }
-
-    // remember the sibling so we can re-attach it at the end
-    RBTNode *pSib = pCurNode->GetParent()->GetLeftChild();
-    if (pSib == pCurNode) {
-      pSib = pCurNode->GetParent()->GetRightChild();
-    }
-
-    // now detach the subtree
-    // need to handle the special case when the root is removed
-    if (pCurNode->GetParent()->GetParent() == NULL) {
-      treeOpt.pRoot = pSib;
-    }
-    pCurNode->DetachSubtree();
-    // set<int> clvs;
-    // pCurNode->GetLeaves( clvs );
-    // cout << "Current subtree has leafs = ";
-    // DumpIntSet( clvs );
-    // set<int> rlvs;
-    // treeOpt.pRoot->GetLeaves( rlvs );
-    // cout << "Remaing tree has leafs = ";
-    // DumpIntSet( rlvs );
-    // cout << "Current subtree = ";
-    // treeOpt.Dump();
-
-    // now do another search
-    TraversRecord tr2;
-    treeOpt.InitPostorderTranvers(tr2);
-    while (true) {
-      // set<int> rlvs3;
-      // treeOpt.pRoot->GetLeaves( rlvs3 );
-      // cout << "During inner loop start, tree has leafs = ";
-      // DumpIntSet( rlvs3 );
-      // cout << "During internal loop, subtree = ";
-      // treeOpt.Dump();
-
-      // cout << "Consider inner node = " << (int)tr2.pCurNode << ", leaf id = "
-      // << tr2.pCurNode->GetLeafId() << endl;
-      // try to re-attach to the node
-      RBTNode *pNewPar = pCurNode->AttachSubtree(tr2.pCurNode);
-      if (tr2.pCurNode == treeOpt.pRoot) {
-        // we created a new root
-        treeOpt.pRoot = pNewPar;
-      }
-
-      // get a maped id
-      // Create a new tree and store it
-      RBT *pRbtStore = new RBT(treeOpt);
-      ngbrTrees.push_back(pRbtStore);
-      // ngbrIds.insert( treeOpt.MapToId()  );
-      // cout << "The SPR transformed subtree = ";
-      // cout << pRbtStore->GetNewick() << endl;
-      // treeOpt.Dump();
-
-      // now we need to detach the node again
-      if (pCurNode->GetParent()->IsRoot() == true) {
-        // when root is removed, we have to re-adjust the root
-        treeOpt.pRoot = tr2.pCurNode;
-      }
-      pCurNode->DetachSubtree();
-
-      // move to next
-      if (treeOpt.NextPostorderTranvers(tr2) == false) {
-        break;
-      }
-    }
-    // cout << "Now attach the current subtree...\n";
-    // now re-attach the node
-    RBTNode *pnode = pCurNode->AttachSubtree(pSib);
-    if (treeOpt.pRoot == pSib) {
-      // cout << "readjust root ...\n";
-      // we need to update the root again
-      treeOpt.pRoot = pnode;
-    }
-    // set<int> rlvs2;
-    // treeOpt.pRoot->GetLeaves( rlvs2 );
-    // cout << "After reattaching at the end of one round, tree has leafs = ";
-    // DumpIntSet( rlvs2 );
-    // cout << "After re-attaching the subtree = ";
-    // treeOpt.Dump();
-
-    // move to next
-    if (treeOpt.NextPostorderTranvers(tr) == false) {
-      break;
-    }
-  }
-}
-
-void RBT ::FindSPRDistOneNgbrsRestricted(vector<RBT *> &ngbrTrees,
-                                         const vector<RBT *> &ConstraintTrees) {
-  // this is slightly different from previous tree in that
-  // we want to narrow down on the number of ngbrs to test, thus
-  // we want to find more promising ngbrs. In particular,
-  // we want to ensure the source branch has a split
-  // that is at least one of the constraint trees
-  // because the source branch will continue to be one of the splits after
-  // transform also, the destination, after merging, the destination new split
-  // need to be in one of the constraint tree
-  RBT treeOpt(*this);
-  int nExcluded = 0;
-
-  TraversRecord tr;
-  treeOpt.InitPostorderTranvers(tr);
-  while (true) {
-    RBTNode *pCurNode = tr.pCurNode;
-    // cout << "Outer loop pcurnode = " << (int)pCurNode << ", lvid = " <<
-    // pCurNode->GetLeafId() << endl;
-    if (pCurNode->GetParent() == NULL) {
-      // do not do the whole tree to remove, that is not valid
-      break;
-    }
-
-    // make sure its leaves are under one of the constriant tree split
-    set<int> lvids;
-    pCurNode->GetLeaves(lvids);
-    // make complmenet if we need
-    if (lvids.find(0) == lvids.end()) {
-      set<int> tmpset;
-      PopulateSetWithInterval(tmpset, 0, this->numLeaves - 1);
-      SubtractSets(tmpset, lvids);
-      lvids = tmpset;
-    }
-    bool fContainsrc = false;
-    for (int ii = 0; ii < (int)ConstraintTrees.size(); ++ii) {
-      RBT *pt = ConstraintTrees[ii];
-      YW_ASSERT_INFO(pt != NULL, "wrong");
-      if (pt->IsSplitContained(lvids) == true) {
-        fContainsrc = true;
-        break;
-      }
-    }
-    if (fContainsrc == false) {
-      nExcluded++;
-    }
-
-    if (fContainsrc == true) {
-
-      // remember the sibling so we can re-attach it at the end
-      RBTNode *pSib = pCurNode->GetParent()->GetLeftChild();
-      if (pSib == pCurNode) {
-        pSib = pCurNode->GetParent()->GetRightChild();
-      }
-
-      // now detach the subtree
-      // need to handle the special case when the root is removed
-      if (pCurNode->GetParent()->GetParent() == NULL) {
-        treeOpt.pRoot = pSib;
-      }
-      pCurNode->DetachSubtree();
-      // set<int> clvs;
-      // pCurNode->GetLeaves( clvs );
-      // cout << "Current subtree has leafs = ";
-      // DumpIntSet( clvs );
-      // set<int> rlvs;
-      // treeOpt.pRoot->GetLeaves( rlvs );
-      // cout << "Remaing tree has leafs = ";
-      // DumpIntSet( rlvs );
-      // cout << "Current subtree = ";
-      // treeOpt.Dump();
-
-      // now do another search
-      TraversRecord tr2;
-      treeOpt.InitPostorderTranvers(tr2);
-      while (true) {
-        // set<int> rlvs3;
-        // treeOpt.pRoot->GetLeaves( rlvs3 );
-        // cout << "During inner loop start, tree has leafs = ";
-        // DumpIntSet( rlvs3 );
-        // cout << "During internal loop, subtree = ";
-        // treeOpt.Dump();
-
-        // cout << "Consider inner node = " << (int)tr2.pCurNode << ", leaf id =
-        // " << tr2.pCurNode->GetLeafId() << endl;
-        // try to re-attach to the node
-        RBTNode *pNewPar = pCurNode->AttachSubtree(tr2.pCurNode);
-        if (tr2.pCurNode == treeOpt.pRoot) {
-          // we created a new root
-          treeOpt.pRoot = pNewPar;
-        }
-
-        // is pNewPar has a split that exists in one of constraint tree?
-        set<int> lvids2;
-        pNewPar->GetLeaves(lvids2);
-        // make complmenet if we need
-        if (lvids2.find(0) == lvids2.end()) {
-          set<int> tmpset;
-          PopulateSetWithInterval(tmpset, 0, this->numLeaves - 1);
-          SubtractSets(tmpset, lvids2);
-          lvids2 = tmpset;
-        }
-        bool fContainsrc2 = false;
-        for (int ii = 0; ii < (int)ConstraintTrees.size(); ++ii) {
-          RBT *pt = ConstraintTrees[ii];
-          YW_ASSERT_INFO(pt != NULL, "wrong");
-          if (pt->IsSplitContained(lvids2) == true) {
-            fContainsrc2 = true;
-            break;
-          }
+        // trace upwards
+        pcur = pcur->GetParent();
+    }
+}
+
+void RBTNode ::Dump() const
+{
+    // print leaf only
+    // this is simply do a post-order traversal
+    if (IsLeaf() == true)
+    {
+        cout << " " << this->lvid;
+        if (GetHeight() >= 0)
+        {
+            cout << "[" << GetHeight() << "]";
         }
-        if (fContainsrc2 == true) {
-          // get a maped id
-          // Create a new tree and store it
-          RBT *pRbtStore = new RBT(treeOpt);
-          ngbrTrees.push_back(pRbtStore);
-          // ngbrIds.insert( treeOpt.MapToId()  );
-          // cout << "The SPR transformed subtree = ";
-          // treeOpt.Dump();
+        cout << " ";
+    }
+    else
+    {
+        cout << "( ";
+        this->GetLeftChild()->Dump();
+        this->GetRightChild()->Dump();
+        cout << " )";
+        if (GetHeight() >= 0)
+        {
+            cout << "[" << GetHeight() << "]";
         }
+        cout << " ";
+    }
+}
 
-        // now we need to detach the node again
-        if (pCurNode->GetParent()->IsRoot() == true) {
-          // when root is removed, we have to re-adjust the root
-          treeOpt.pRoot = tr2.pCurNode;
-        }
-        pCurNode->DetachSubtree();
+string RBTNode ::GetNewick() const
+{
+    // if leaf, fill in the leaf id
+    if (IsLeaf() == true)
+    {
+        char buf[100];
+        sprintf(buf, "%d", this->lvid);
+        return string(buf);
+    }
+    else
+    {
+        string s1 = this->GetLeftChild()->GetNewick();
+        string s2 = this->GetRightChild()->GetNewick();
+        return string("(") + s1 + string(",") + s2 + string(")");
+    }
+}
+
+void RBTNode ::AddSiblingToLeaf(int lvid)
+{
+    // add a sibling to the current node, which must be a leaf
+    YW_ASSERT_INFO(IsLeaf() == true, "Can not add to a non-leaf node");
+
+    // create a new node
+    RBTNode *pnode = new RBTNode(lvid);
+
+    // add it
+    pnode->AttachSubtree(this);
+
+    // create a new node
+    // but remeber the parent first
+    //	RBTNode *ppar = GetParent();
+    //	bool fLeftChild = IsLeftChild();
+    //	YW_ASSERT_INFO( ppar != NULL, "Can not be NULL" );
+    //	RBTNode *pinternal = AddSibling( lvid );
+    // setup connection
+    //	pinternal->SetParent( ppar );
+    //	if( fLeftChild == true )
+    //	{
+    //		// add to the left
+    //		ppar->SetLeftChild( pinternal );
+    //	}
+    //	else
+    //	{
+    //		ppar->SetRightChild( pinternal );
+    //	}
+}
+
+void RBTNode ::OutputNodeGML(ofstream &outFile)
+{
+    outFile << "node [\n";
+    char name[100];
+    // the name is equal to it
+    if (IsLeaf() == true)
+    {
+        name[0] = 'v';
+        sprintf(&name[1], "%d", GetLeafId());
+    }
+    else
+    {
+        name[0] = ' ';
+        name[1] = '\0';
+    }
+    outFile << "id " << GetLeafId() << endl;
+    outFile << "label ";
+    OutputQuotedString(outFile, name);
+    outFile << endl;
+    outFile << "defaultAtrribute   1\n";
+    outFile << "]\n";
+    //cout << "Output one node: id = " << GetId() << "\n";
+    // handle the children
+    if (IsLeaf() == false)
+    {
+        GetLeftChild()->OutputNodeGML(outFile);
+        GetRightChild()->OutputNodeGML(outFile);
+    }
+}
 
-        // move to next
-        if (treeOpt.NextPostorderTranvers(tr2) == false) {
-          break;
+void RBTNode ::OutputEdgeGML(ofstream &outFile)
+{
+    char name[100];
+    int id1 = GetLeafId();
+    if (IsLeaf() == false)
+    {
+        for (int i = 0; i < 2; ++i)
+        {
+            int id2 = GetLeftChild()->GetLeafId();
+            if (i == 1)
+            {
+                id2 = GetRightChild()->GetLeafId();
+            }
+
+            name[0] = ' ';
+            name[1] = '\0';
+            //		sprintf(&name[1], "%d-%d", id1, id2 );
+            //cout << "Output one edge: " << id1 << ", " << id2 << endl;
+
+            outFile << "edge [\n";
+            outFile << "source " << id1 << endl;
+            outFile << "target  " << id2 << endl;
+            outFile << "label ";
+            //cout << "edge label = " << name << endl;
+            OutputQuotedString(outFile, name);
+            outFile << "\n";
+            outFile << "]\n";
         }
-      }
-      // cout << "Now attach the current subtree...\n";
-      // now re-attach the node
-      RBTNode *pnode = pCurNode->AttachSubtree(pSib);
-      if (treeOpt.pRoot == pSib) {
-        // cout << "readjust root ...\n";
-        // we need to update the root again
-        treeOpt.pRoot = pnode;
-      }
-      // set<int> rlvs2;
-      // treeOpt.pRoot->GetLeaves( rlvs2 );
-      // cout << "After reattaching at the end of one round, tree has leafs = ";
-      // DumpIntSet( rlvs2 );
-      // cout << "After re-attaching the subtree = ";
-      // treeOpt.Dump();
     }
+    // handle the children
+    if (IsLeaf() == false)
+    {
+        GetLeftChild()->OutputEdgeGML(outFile);
+        GetRightChild()->OutputEdgeGML(outFile);
+    }
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////
+//
+////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////
+// different ways of initializing a tree
+// it can be by a supplied id
+RBT ::RBT(int numLeaves, RBT_ID tid)
+{
+    Init();
 
-    // move to next
-    if (treeOpt.NextPostorderTranvers(tr) == false) {
-      break;
+    // save the id
+    this->numLeaves = numLeaves;
+    this->tid = tid;
+    YW_ASSERT_INFO(numLeaves >= 3, "Too few leaves");
+
+    // construct by the tid
+    ReconstructById(tid);
+}
+
+RBT ::RBT(const RBT &rhs)
+{
+    this->numLeaves = rhs.numLeaves;
+    this->tid = rhs.tid;
+    this->pRoot = rhs.pRoot->CopySubTree();
+}
+
+RBT &RBT ::operator=(const RBT &rhs)
+{
+    // get rid of current
+    if (this->pRoot != NULL)
+    {
+        delete this->pRoot;
+        this->pRoot = NULL;
     }
-  }
 
-  cout << "excluded num = " << nExcluded << endl;
+    this->numLeaves = rhs.numLeaves;
+    this->tid = rhs.tid;
+    this->pRoot = rhs.pRoot->CopySubTree();
+
+    return *this;
 }
 
-// is tree SPR away from this
-bool RBT ::IsOneSPRAway(const RBT &rbt) const {
-  // testing whether it is one SPR away
-  // Simply try to morph the current tree t
-  // Double loop: first try every subtree of the original
-  // then try to attach it to each of the original node
-  // note, we do not want to re-generate trees many times
-  // so we need to re-attach the detached subtrees each time we need
-  // BUT, to make process fast, we need to reduce the tree as much as we can
-  //
-  RBT treeOpt(*this);
-  RBT treeCmp(rbt);
-
-  // reduce the two trees
-  Consolidate(treeOpt, treeCmp);
-  // cout <<"After consolidation, trees are: \n";
-  // treeOpt.Dump();
-  // treeCmp.Dump();
-
-  // first make an list of maps to nodes at tips
-  treeOpt.CollectTips();
-  treeCmp.CollectTips();
-  vector<RBTNode *> listTips1;
-  treeOpt.GetAllTips(listTips1);
-  // cout << "Find tip num = " << listTips1.size() << endl;
-  // store all pair of nodes s.t. it only appears in treeOpt
-  // In fact, if the preprocessing step is correct,
-  // a cherry (a pair of nodes) appears in treeA can NOT appear in treeB
-  map<pair<RBTNode *, RBTNode *>, bool> mapCherry1;
-  for (int i = 0; i < (int)listTips1.size(); ++i) {
-    // cout << "Processing tip = " << listTips1[i]->GetLeafId() << endl;
-    // get its sibling
-    RBTNode *pSib = listTips1[i]->GetSibling();
-    if (pSib->IsLeaf() == true) {
-      // cout << "Sibling is a LEAF...\n";
-      pair<RBTNode *, RBTNode *> pp;
-      // get rid of order
-      if ((long)pSib > (long)listTips1[i]) {
-        pp.first = listTips1[i];
-        pp.second = pSib;
-      } else {
-        pp.second = listTips1[i];
-        pp.first = pSib;
-      }
-      mapCherry1.insert(
-          map<pair<RBTNode *, RBTNode *>, bool>::value_type(pp, true));
-
-      // make sure preprocessing is correct
-      // by checking the situation at the other tree
-      // the same pair can NOT appear
-      RBTNode *pOther1 = treeCmp.GetTip(pp.first->GetLeafId());
-      RBTNode *pOtherSib = pOther1->GetSibling();
-      RBTNode *pOther2 = treeCmp.GetTip(pp.second->GetLeafId());
-      YW_ASSERT_INFO(pOtherSib != pOther2, "Tree preprocessing wrong");
-    }
-  }
-  // if there is more than 2 pair left, we are done
-  if (mapCherry1.size() >= 3) {
-    //
-    return false;
-  }
-  YW_ASSERT_INFO(mapCherry1.size() > 0 && mapCherry1.size() < 3,
-                 "Wrong: cherry number can not be empty");
-  //  In this case, pick one pair (say the first), and perform one SPR to get a
-  //  proper subset
-  // collect the list of leaf edges to try
-  // vector< RBTNode *> listLeafToBePruned, listRegraftDest;
-  // for( map< pair<RBTNode *, RBTNode *>, bool > :: iterator it =
-  // mapCherry1.begin(); it != mapCherry1.end(); ++it )
-  //{
-  //	listLeafToBePruned.push_back( it->first.first );
-  //	listLeafToBePruned.push_back( it->first.second );
-  //}
-  // also figure out the destination it has to be
-  // for(int i=0; i<(int)listLeafToBePruned.size();++i)
-  //{
-  //
-  //}
-
-  // first, if there is only one pair of tips, then the tree must be like a comb
-  RBTNode *pLeaf1 = NULL;
-  RBTNode *pLeaf2 = NULL;
-  RBTNode *pLeaf3 = NULL;
-  RBTNode *pLeaf4 = NULL;
-  map<pair<RBTNode *, RBTNode *>, bool>::iterator it = mapCherry1.begin();
-  pLeaf1 = it->first.first;
-  pLeaf2 = it->first.second;
-  it++;
-  if (it != mapCherry1.end()) {
-    pLeaf3 = it->first.first;
-    pLeaf4 = it->first.second;
-  }
-
-  // now start real comparasion
-  TraversRecord tr;
-  treeOpt.InitPostorderTranvers(tr);
-  while (true) {
-    RBTNode *pCurNode = tr.pCurNode;
-    // cout << "Outer loop pcurnode = " << (int)pCurNode << ", lvid = " <<
-    // pCurNode->GetLeafId() << endl;
-    if (pCurNode->GetParent() == NULL) {
-      // do not do the whole tree to remove, that is not valid
-      break;
-    }
-
-    // remember the sibling so we can re-attach it at the end
-    RBTNode *pSib = pCurNode->GetParent()->GetLeftChild();
-    if (pSib == pCurNode) {
-      pSib = pCurNode->GetParent()->GetRightChild();
-    }
-
-    // now detach the subtree
-    // need to handle the special case when the root is removed
-    if (pCurNode->GetParent()->GetParent() == NULL) {
-      treeOpt.pRoot = pSib;
-    }
-    pCurNode->DetachSubtree();
-    // set<int> clvs;
-    // pCurNode->GetLeaves( clvs );
-    // cout << "Current subtree has leafs = ";
-    // DumpIntSet( clvs );
-    // set<int> rlvs;
-    // treeOpt.pRoot->GetLeaves( rlvs );
-    // cout << "Remaing tree has leafs = ";
-    // DumpIntSet( rlvs );
-    // cout << "Current subtree = ";
-    // treeOpt.Dump();
-
-    // now do another search
-    TraversRecord tr2;
-    treeOpt.InitPostorderTranvers(tr2);
-    while (true) {
-      // set<int> rlvs3;
-      // treeOpt.pRoot->GetLeaves( rlvs3 );
-      // cout << "During inner loop start, tree has leafs = ";
-      // DumpIntSet( rlvs3 );
-      // cout << "During internal loop, subtree = ";
-      // treeOpt.Dump();
-
-      // make sure this node is what we need:
-      // (1) must be a leaf
-      if (tr2.pCurNode->IsLeaf() == true &&
-          ((mapCherry1.size() == 1 &&
-            (pCurNode == pLeaf1 || pCurNode == pLeaf2 ||
-             tr2.pCurNode == pLeaf1 || tr2.pCurNode == pLeaf2)) ||
-           (mapCherry1.size() == 2 &&
-            (((pCurNode == pLeaf1 || pCurNode == pLeaf2) &&
-              (tr2.pCurNode == pLeaf3 || tr2.pCurNode == pLeaf4)) ||
-             ((pCurNode == pLeaf3 || pCurNode == pLeaf4) &&
-              (tr2.pCurNode == pLeaf1 || tr2.pCurNode == pLeaf2)))))) {
-
-        // cout << "Consider inner node = " << (int)tr2.pCurNode << ", leaf id =
-        // " << tr2.pCurNode->GetLeafId() << endl;
-        // try to re-attach to the node
-        RBTNode *pNewPar = pCurNode->AttachSubtree(tr2.pCurNode);
-        if (tr2.pCurNode == treeOpt.pRoot) {
-          // we created a new root
-          treeOpt.pRoot = pNewPar;
-        }
-
-        // Test whether the morphed tree is the SAME as the other
-        if (treeOpt.IsSame(treeCmp) == true) {
-          // find it
-          return true;
-          // cout << "The SPR transformed subtree = ";
-          // treeOpt.Dump();
-        }
-
-        // now we need to detach the node again
-        if (pCurNode->GetParent()->IsRoot() == true) {
-          // when root is removed, we have to re-adjust the root
-          treeOpt.pRoot = tr2.pCurNode;
-        }
-        pCurNode->DetachSubtree();
-      }
-      // move to next
-      if (treeOpt.NextPostorderTranvers(tr2) == false) {
-        break;
-      }
-    }
-    // cout << "Now attach the current subtree...\n";
-    // now re-attach the node
-    RBTNode *pnode = pCurNode->AttachSubtree(pSib);
-    if (treeOpt.pRoot == pSib) {
-      // cout << "readjust root ...\n";
-      // we need to update the root again
-      treeOpt.pRoot = pnode;
-    }
-    // set<int> rlvs2;
-    // treeOpt.pRoot->GetLeaves( rlvs2 );
-    // cout << "After reattaching at the end of one round, tree has leafs = ";
-    // DumpIntSet( rlvs2 );
-    // cout << "After re-attaching the subtree = ";
-    // treeOpt.Dump();
-
-    // move to next
-    if (treeOpt.NextPostorderTranvers(tr) == false) {
-      break;
-    }
-  }
-
-  // did not find
-  return false;
+RBT ::RBT(int numLeaves, const vector<int> &listNodeLabels, const vector<int> &listParentNodePos,
+          const vector<double> &listEdgeDist)
+{
+    this->numLeaves = numLeaves;
+    this->tid = -1; // in this mode, we do not care about tid
+    this->pRoot = NULL;
+    // construct by the tid
+    ReconstructByPlainDesc(listNodeLabels, listParentNodePos, listEdgeDist);
+}
+
+RBT ::~RBT()
+{
+    //cout << "INside destructor\n";
+    //cout << "number of leaves = " << pRoot->GetNumLeavesUnder() << endl;
+    this->pRoot->Clear();
+    delete pRoot;
+    pRoot = NULL;
+    //cout << "done with one destructor\n";
+}
+
+// ID functions
+RBT_ID RBT ::GetId()
+{
+    if (tid >= 0)
+    {
+        // return the cached one
+        return tid;
+    }
+    // get it
+    this->tid = MapToId(); // indicate it is invalid
+    return this->tid;
+}
+
+void RBT ::OutputGML(const char *fileName)
+{
+    // Now output a file in GML format
+    // First create a new name
+    string name = fileName;
+    //cout << "num edges = " << listEdges.size() << endl;
+
+    DEBUG("FileName=");
+    DEBUG(name);
+    DEBUG("\n");
+    // Now open file to write out
+    ofstream outFile(name.c_str());
+
+    // First output some header info
+    outFile << "graph [\n";
+    outFile << "comment ";
+    OutputQuotedString(outFile, "Automatically generated by Graphing tool");
+    outFile << "\ndirected  1\n";
+    outFile << "id  1\n";
+    outFile << "label ";
+    OutputQuotedString(outFile, "To be more meaningful later....\n");
+    //cout << "Here we go\n";
+    // Now output all the vertices by simply calling through root node
+    pRoot->OutputNodeGML(outFile);
+
+    // Now output all the edges by calling through the root
+    pRoot->OutputEdgeGML(outFile);
+
+    // Finally quite after closing file
+    outFile << "\n]\n";
+    outFile.close();
+}
+
+// splits functions
+bool RBT ::IsSplitContained(const set<int> &split)
+{
+    // simply check the map
+    if (mapSplitsInTree.size() == 0)
+    {
+        // Need to figure out splits
+        RetrieveSplits();
+    }
+    return mapSplitsInTree.find(split) != mapSplitsInTree.end();
+}
+
+void RBT ::GetAllSplits(vector<set<int>> &listSplits)
+{
+    if (mapSplitsInTree.size() == 0)
+    {
+        // Need to figure out splits
+        RetrieveSplits();
+    }
+
+    listSplits.clear();
+    for (map<set<int>, bool>::iterator it = mapSplitsInTree.begin(); it != mapSplitsInTree.end(); ++it)
+    {
+        // put it
+        listSplits.push_back(it->first);
+    }
+}
+
+// SPR function
+void RBT ::FindSPRDistOneNgbrs(set<int> &ngbrIds)
+{
+    // Double loop: first try every subtree of the original
+    // then try to attach it to each of the original node
+    // note, we do not want to re-generate trees many times
+    // so we need to re-attach the detached subtrees each time we need
+    RBT treeOpt(*this);
 
-#if 0
-	// now start real comparasion
     TraversRecord tr;
     treeOpt.InitPostorderTranvers(tr);
-    while( true )
+    while (true)
     {
         RBTNode *pCurNode = tr.pCurNode;
-//cout << "Outer loop pcurnode = " << (int)pCurNode << ", lvid = " << pCurNode->GetLeafId() << endl;
-        if( pCurNode->GetParent() == NULL  )
+        //cout << "Outer loop pcurnode = " << (int)pCurNode << ", lvid = " << pCurNode->GetLeafId() << endl;
+        if (pCurNode->GetParent() == NULL)
         {
             // do not do the whole tree to remove, that is not valid
             break;
@@ -1198,64 +732,57 @@ bool RBT ::IsOneSPRAway(const RBT &rbt) const {
 
         // remember the sibling so we can re-attach it at the end
         RBTNode *pSib = pCurNode->GetParent()->GetLeftChild();
-        if( pSib == pCurNode )
+        if (pSib == pCurNode)
         {
             pSib = pCurNode->GetParent()->GetRightChild();
         }
 
-
-
         // now detach the subtree
         // need to handle the special case when the root is removed
-        if( pCurNode->GetParent()->GetParent() == NULL )
+        if (pCurNode->GetParent()->GetParent() == NULL)
         {
             treeOpt.pRoot = pSib;
         }
         pCurNode->DetachSubtree();
-//set<int> clvs;
-//pCurNode->GetLeaves( clvs );
-//cout << "Current subtree has leafs = ";
-//DumpIntSet( clvs );
-//set<int> rlvs;
-//treeOpt.pRoot->GetLeaves( rlvs );
-//cout << "Remaing tree has leafs = ";
-//DumpIntSet( rlvs );
-//cout << "Current subtree = ";
-//treeOpt.Dump();
+        //set<int> clvs;
+        //pCurNode->GetLeaves( clvs );
+        //cout << "Current subtree has leafs = ";
+        //DumpIntSet( clvs );
+        //set<int> rlvs;
+        //treeOpt.pRoot->GetLeaves( rlvs );
+        //cout << "Remaing tree has leafs = ";
+        //DumpIntSet( rlvs );
+        //cout << "Current subtree = ";
+        //treeOpt.Dump();
 
         // now do another search
         TraversRecord tr2;
-        treeOpt.InitPostorderTranvers( tr2 );
-        while(true)
+        treeOpt.InitPostorderTranvers(tr2);
+        while (true)
         {
-//set<int> rlvs3;
-//treeOpt.pRoot->GetLeaves( rlvs3 );
-//cout << "During inner loop start, tree has leafs = ";
-//DumpIntSet( rlvs3 );
-//cout << "During internal loop, subtree = ";
-//treeOpt.Dump();
-
-
-//cout << "Consider inner node = " << (int)tr2.pCurNode << ", leaf id = " << tr2.pCurNode->GetLeafId() << endl;
+            //set<int> rlvs3;
+            //treeOpt.pRoot->GetLeaves( rlvs3 );
+            //cout << "During inner loop start, tree has leafs = ";
+            //DumpIntSet( rlvs3 );
+            //cout << "During internal loop, subtree = ";
+            //treeOpt.Dump();
+
+            //cout << "Consider inner node = " << (int)tr2.pCurNode << ", leaf id = " << tr2.pCurNode->GetLeafId() << endl;
             // try to re-attach to the node
             RBTNode *pNewPar = pCurNode->AttachSubtree(tr2.pCurNode);
-            if( tr2.pCurNode == treeOpt.pRoot )
+            if (tr2.pCurNode == treeOpt.pRoot)
             {
                 // we created a new root
                 treeOpt.pRoot = pNewPar;
             }
 
-            // Test whether the morphed tree is the SAME as the other
-            if (treeOpt.IsSame( treeCmp ) == true  )
-			{
-				// find it
-				return true;
-//cout << "The SPR transformed subtree = ";
-//treeOpt.Dump();
-			}
+            // get a maped id
+            ngbrIds.insert(treeOpt.MapToId());
+            //cout << "The SPR transformed subtree = ";
+            //treeOpt.Dump();
 
             // now we need to detach the node again
-            if( pCurNode->GetParent()->IsRoot() == true  )
+            if (pCurNode->GetParent()->IsRoot() == true)
             {
                 // when root is removed, we have to re-adjust the root
                 treeOpt.pRoot = tr2.pCurNode;
@@ -1263,49 +790,550 @@ bool RBT ::IsOneSPRAway(const RBT &rbt) const {
             pCurNode->DetachSubtree();
 
             // move to next
-            if( treeOpt.NextPostorderTranvers(tr2) == false )
+            if (treeOpt.NextPostorderTranvers(tr2) == false)
             {
                 break;
             }
-
         }
-//cout << "Now attach the current subtree...\n";
+        //cout << "Now attach the current subtree...\n";
         // now re-attach the node
-        RBTNode *pnode = pCurNode->AttachSubtree( pSib );
-        if( treeOpt.pRoot == pSib )
+        RBTNode *pnode = pCurNode->AttachSubtree(pSib);
+        if (treeOpt.pRoot == pSib)
         {
-//cout << "readjust root ...\n";
+            //cout << "readjust root ...\n";
             // we need to update the root again
             treeOpt.pRoot = pnode;
         }
-//set<int> rlvs2;
-//treeOpt.pRoot->GetLeaves( rlvs2 );
-//cout << "After reattaching at the end of one round, tree has leafs = ";
-//DumpIntSet( rlvs2 );
-//cout << "After re-attaching the subtree = ";
-//treeOpt.Dump();
+        //set<int> rlvs2;
+        //treeOpt.pRoot->GetLeaves( rlvs2 );
+        //cout << "After reattaching at the end of one round, tree has leafs = ";
+        //DumpIntSet( rlvs2 );
+        //cout << "After re-attaching the subtree = ";
+        //treeOpt.Dump();
 
         // move to next
-        if( treeOpt.NextPostorderTranvers(tr) == false )
+        if (treeOpt.NextPostorderTranvers(tr) == false)
         {
             break;
         }
     }
 
-	// did not find
-	return false;
+#if 0
+	set<RBT> ngbrTrees;
+	FindSPRDistOneNgbrs(ngbrTrees);
+	for( set<RBT> :: iterator it = ngbrTrees.begin(); it != ngbrTrees.end(); ++it )
+	{
+		RBT tr = *it;
+		ngbrIds.insert( tr.MapToId()  );
+	}
 #endif
+    // get rid of the same tree
+    ngbrIds.erase(GetId());
+}
 
-#if 0
-	// testing whether it is one SPR away
-	// Simply try to morph the current tree t
+void RBT ::FindSPRDistOneNgbrs(vector<RBT *> &ngbrTrees)
+{
+    // Double loop: first try every subtree of the original
+    // then try to attach it to each of the original node
+    // note, we do not want to re-generate trees many times
+    // so we need to re-attach the detached subtrees each time we need
+    RBT treeOpt(*this);
+    //cout << "RBT: find SPR ngbr: current tree: " << treeOpt.GetNewick() << endl;
+
+    TraversRecord tr;
+    treeOpt.InitPostorderTranvers(tr);
+    while (true)
+    {
+        RBTNode *pCurNode = tr.pCurNode;
+        //cout << "Outer loop pcurnode = " << (int)pCurNode << ", lvid = " << pCurNode->GetLeafId() << endl;
+        if (pCurNode->GetParent() == NULL)
+        {
+            // do not do the whole tree to remove, that is not valid
+            break;
+        }
+
+        // remember the sibling so we can re-attach it at the end
+        RBTNode *pSib = pCurNode->GetParent()->GetLeftChild();
+        if (pSib == pCurNode)
+        {
+            pSib = pCurNode->GetParent()->GetRightChild();
+        }
+
+        // now detach the subtree
+        // need to handle the special case when the root is removed
+        if (pCurNode->GetParent()->GetParent() == NULL)
+        {
+            treeOpt.pRoot = pSib;
+        }
+        pCurNode->DetachSubtree();
+        //set<int> clvs;
+        //pCurNode->GetLeaves( clvs );
+        //cout << "Current subtree has leafs = ";
+        //DumpIntSet( clvs );
+        //set<int> rlvs;
+        //treeOpt.pRoot->GetLeaves( rlvs );
+        //cout << "Remaing tree has leafs = ";
+        //DumpIntSet( rlvs );
+        //cout << "Current subtree = ";
+        //treeOpt.Dump();
+
+        // now do another search
+        TraversRecord tr2;
+        treeOpt.InitPostorderTranvers(tr2);
+        while (true)
+        {
+            //set<int> rlvs3;
+            //treeOpt.pRoot->GetLeaves( rlvs3 );
+            //cout << "During inner loop start, tree has leafs = ";
+            //DumpIntSet( rlvs3 );
+            //cout << "During internal loop, subtree = ";
+            //treeOpt.Dump();
+
+            //cout << "Consider inner node = " << (int)tr2.pCurNode << ", leaf id = " << tr2.pCurNode->GetLeafId() << endl;
+            // try to re-attach to the node
+            RBTNode *pNewPar = pCurNode->AttachSubtree(tr2.pCurNode);
+            if (tr2.pCurNode == treeOpt.pRoot)
+            {
+                // we created a new root
+                treeOpt.pRoot = pNewPar;
+            }
+
+            // get a maped id
+            // Create a new tree and store it
+            RBT *pRbtStore = new RBT(treeOpt);
+            ngbrTrees.push_back(pRbtStore);
+            //ngbrIds.insert( treeOpt.MapToId()  );
+            //cout << "The SPR transformed subtree = ";
+            //cout << pRbtStore->GetNewick() << endl;
+            //treeOpt.Dump();
+
+            // now we need to detach the node again
+            if (pCurNode->GetParent()->IsRoot() == true)
+            {
+                // when root is removed, we have to re-adjust the root
+                treeOpt.pRoot = tr2.pCurNode;
+            }
+            pCurNode->DetachSubtree();
+
+            // move to next
+            if (treeOpt.NextPostorderTranvers(tr2) == false)
+            {
+                break;
+            }
+        }
+        //cout << "Now attach the current subtree...\n";
+        // now re-attach the node
+        RBTNode *pnode = pCurNode->AttachSubtree(pSib);
+        if (treeOpt.pRoot == pSib)
+        {
+            //cout << "readjust root ...\n";
+            // we need to update the root again
+            treeOpt.pRoot = pnode;
+        }
+        //set<int> rlvs2;
+        //treeOpt.pRoot->GetLeaves( rlvs2 );
+        //cout << "After reattaching at the end of one round, tree has leafs = ";
+        //DumpIntSet( rlvs2 );
+        //cout << "After re-attaching the subtree = ";
+        //treeOpt.Dump();
+
+        // move to next
+        if (treeOpt.NextPostorderTranvers(tr) == false)
+        {
+            break;
+        }
+    }
+}
+
+void RBT ::FindSPRDistOneNgbrsRestricted(vector<RBT *> &ngbrTrees, const vector<RBT *> &ConstraintTrees)
+{
+    // this is slightly different from previous tree in that
+    // we want to narrow down on the number of ngbrs to test, thus
+    // we want to find more promising ngbrs. In particular,
+    // we want to ensure the source branch has a split
+    // that is at least one of the constraint trees
+    // because the source branch will continue to be one of the splits after transform
+    // also, the destination, after merging, the destination new split
+    // need to be in one of the constraint tree
+    RBT treeOpt(*this);
+    int nExcluded = 0;
+
+    TraversRecord tr;
+    treeOpt.InitPostorderTranvers(tr);
+    while (true)
+    {
+        RBTNode *pCurNode = tr.pCurNode;
+        //cout << "Outer loop pcurnode = " << (int)pCurNode << ", lvid = " << pCurNode->GetLeafId() << endl;
+        if (pCurNode->GetParent() == NULL)
+        {
+            // do not do the whole tree to remove, that is not valid
+            break;
+        }
+
+        // make sure its leaves are under one of the constriant tree split
+        set<int> lvids;
+        pCurNode->GetLeaves(lvids);
+        // make complmenet if we need
+        if (lvids.find(0) == lvids.end())
+        {
+            set<int> tmpset;
+            PopulateSetWithInterval(tmpset, 0, this->numLeaves - 1);
+            SubtractSets(tmpset, lvids);
+            lvids = tmpset;
+        }
+        bool fContainsrc = false;
+        for (int ii = 0; ii < (int)ConstraintTrees.size(); ++ii)
+        {
+            RBT *pt = ConstraintTrees[ii];
+            YW_ASSERT_INFO(pt != NULL, "wrong");
+            if (pt->IsSplitContained(lvids) == true)
+            {
+                fContainsrc = true;
+                break;
+            }
+        }
+        if (fContainsrc == false)
+        {
+            nExcluded++;
+        }
+
+        if (fContainsrc == true)
+        {
+
+            // remember the sibling so we can re-attach it at the end
+            RBTNode *pSib = pCurNode->GetParent()->GetLeftChild();
+            if (pSib == pCurNode)
+            {
+                pSib = pCurNode->GetParent()->GetRightChild();
+            }
+
+            // now detach the subtree
+            // need to handle the special case when the root is removed
+            if (pCurNode->GetParent()->GetParent() == NULL)
+            {
+                treeOpt.pRoot = pSib;
+            }
+            pCurNode->DetachSubtree();
+            //set<int> clvs;
+            //pCurNode->GetLeaves( clvs );
+            //cout << "Current subtree has leafs = ";
+            //DumpIntSet( clvs );
+            //set<int> rlvs;
+            //treeOpt.pRoot->GetLeaves( rlvs );
+            //cout << "Remaing tree has leafs = ";
+            //DumpIntSet( rlvs );
+            //cout << "Current subtree = ";
+            //treeOpt.Dump();
+
+            // now do another search
+            TraversRecord tr2;
+            treeOpt.InitPostorderTranvers(tr2);
+            while (true)
+            {
+                //set<int> rlvs3;
+                //treeOpt.pRoot->GetLeaves( rlvs3 );
+                //cout << "During inner loop start, tree has leafs = ";
+                //DumpIntSet( rlvs3 );
+                //cout << "During internal loop, subtree = ";
+                //treeOpt.Dump();
+
+                //cout << "Consider inner node = " << (int)tr2.pCurNode << ", leaf id = " << tr2.pCurNode->GetLeafId() << endl;
+                // try to re-attach to the node
+                RBTNode *pNewPar = pCurNode->AttachSubtree(tr2.pCurNode);
+                if (tr2.pCurNode == treeOpt.pRoot)
+                {
+                    // we created a new root
+                    treeOpt.pRoot = pNewPar;
+                }
+
+                // is pNewPar has a split that exists in one of constraint tree?
+                set<int> lvids2;
+                pNewPar->GetLeaves(lvids2);
+                // make complmenet if we need
+                if (lvids2.find(0) == lvids2.end())
+                {
+                    set<int> tmpset;
+                    PopulateSetWithInterval(tmpset, 0, this->numLeaves - 1);
+                    SubtractSets(tmpset, lvids2);
+                    lvids2 = tmpset;
+                }
+                bool fContainsrc2 = false;
+                for (int ii = 0; ii < (int)ConstraintTrees.size(); ++ii)
+                {
+                    RBT *pt = ConstraintTrees[ii];
+                    YW_ASSERT_INFO(pt != NULL, "wrong");
+                    if (pt->IsSplitContained(lvids2) == true)
+                    {
+                        fContainsrc2 = true;
+                        break;
+                    }
+                }
+                if (fContainsrc2 == true)
+                {
+                    // get a maped id
+                    // Create a new tree and store it
+                    RBT *pRbtStore = new RBT(treeOpt);
+                    ngbrTrees.push_back(pRbtStore);
+                    //ngbrIds.insert( treeOpt.MapToId()  );
+                    //cout << "The SPR transformed subtree = ";
+                    //treeOpt.Dump();
+                }
+
+                // now we need to detach the node again
+                if (pCurNode->GetParent()->IsRoot() == true)
+                {
+                    // when root is removed, we have to re-adjust the root
+                    treeOpt.pRoot = tr2.pCurNode;
+                }
+                pCurNode->DetachSubtree();
+
+                // move to next
+                if (treeOpt.NextPostorderTranvers(tr2) == false)
+                {
+                    break;
+                }
+            }
+            //cout << "Now attach the current subtree...\n";
+            // now re-attach the node
+            RBTNode *pnode = pCurNode->AttachSubtree(pSib);
+            if (treeOpt.pRoot == pSib)
+            {
+                //cout << "readjust root ...\n";
+                // we need to update the root again
+                treeOpt.pRoot = pnode;
+            }
+            //set<int> rlvs2;
+            //treeOpt.pRoot->GetLeaves( rlvs2 );
+            //cout << "After reattaching at the end of one round, tree has leafs = ";
+            //DumpIntSet( rlvs2 );
+            //cout << "After re-attaching the subtree = ";
+            //treeOpt.Dump();
+        }
+
+        // move to next
+        if (treeOpt.NextPostorderTranvers(tr) == false)
+        {
+            break;
+        }
+    }
+
+    cout << "excluded num = " << nExcluded << endl;
+}
+
+// is tree SPR away from this
+bool RBT ::IsOneSPRAway(const RBT &rbt) const
+{
+    // testing whether it is one SPR away
+    // Simply try to morph the current tree t
     // Double loop: first try every subtree of the original
     // then try to attach it to each of the original node
     // note, we do not want to re-generate trees many times
     // so we need to re-attach the detached subtrees each time we need
+    // BUT, to make process fast, we need to reduce the tree as much as we can
+    //
     RBT treeOpt(*this);
+    RBT treeCmp(rbt);
+
+    // reduce the two trees
+    Consolidate(treeOpt, treeCmp);
+    //cout <<"After consolidation, trees are: \n";
+    //treeOpt.Dump();
+    //treeCmp.Dump();
+
+    // first make an list of maps to nodes at tips
+    treeOpt.CollectTips();
+    treeCmp.CollectTips();
+    vector<RBTNode *> listTips1;
+    treeOpt.GetAllTips(listTips1);
+    //cout << "Find tip num = " << listTips1.size() << endl;
+    // store all pair of nodes s.t. it only appears in treeOpt
+    // In fact, if the preprocessing step is correct,
+    // a cherry (a pair of nodes) appears in treeA can NOT appear in treeB
+    map<pair<RBTNode *, RBTNode *>, bool> mapCherry1;
+    for (int i = 0; i < (int)listTips1.size(); ++i)
+    {
+        //cout << "Processing tip = " << listTips1[i]->GetLeafId() << endl;
+        // get its sibling
+        RBTNode *pSib = listTips1[i]->GetSibling();
+        if (pSib->IsLeaf() == true)
+        {
+            //cout << "Sibling is a LEAF...\n";
+            pair<RBTNode *, RBTNode *> pp;
+            // get rid of order
+            if ((long)pSib > (long)listTips1[i])
+            {
+                pp.first = listTips1[i];
+                pp.second = pSib;
+            }
+            else
+            {
+                pp.second = listTips1[i];
+                pp.first = pSib;
+            }
+            mapCherry1.insert(map<pair<RBTNode *, RBTNode *>, bool>::value_type(pp, true));
+
+            // make sure preprocessing is correct
+            // by checking the situation at the other tree
+            // the same pair can NOT appear
+            RBTNode *pOther1 = treeCmp.GetTip(pp.first->GetLeafId());
+            RBTNode *pOtherSib = pOther1->GetSibling();
+            RBTNode *pOther2 = treeCmp.GetTip(pp.second->GetLeafId());
+            YW_ASSERT_INFO(pOtherSib != pOther2, "Tree preprocessing wrong");
+        }
+    }
+    // if there is more than 2 pair left, we are done
+    if (mapCherry1.size() >= 3)
+    {
+        //
+        return false;
+    }
+    YW_ASSERT_INFO(mapCherry1.size() > 0 && mapCherry1.size() < 3, "Wrong: cherry number can not be empty");
+    //  In this case, pick one pair (say the first), and perform one SPR to get a proper subset
+    // collect the list of leaf edges to try
+    //vector< RBTNode *> listLeafToBePruned, listRegraftDest;
+    //for( map< pair<RBTNode *, RBTNode *>, bool > :: iterator it = mapCherry1.begin(); it != mapCherry1.end(); ++it )
+    //{
+    //	listLeafToBePruned.push_back( it->first.first );
+    //	listLeafToBePruned.push_back( it->first.second );
+    //}
+    // also figure out the destination it has to be
+    //for(int i=0; i<(int)listLeafToBePruned.size();++i)
+    //{
+    //
+    //}
+
+    // first, if there is only one pair of tips, then the tree must be like a comb
+    RBTNode *pLeaf1 = NULL;
+    RBTNode *pLeaf2 = NULL;
+    RBTNode *pLeaf3 = NULL;
+    RBTNode *pLeaf4 = NULL;
+    map<pair<RBTNode *, RBTNode *>, bool>::iterator it = mapCherry1.begin();
+    pLeaf1 = it->first.first;
+    pLeaf2 = it->first.second;
+    it++;
+    if (it != mapCherry1.end())
+    {
+        pLeaf3 = it->first.first;
+        pLeaf4 = it->first.second;
+    }
+
+    // now start real comparasion
+    TraversRecord tr;
+    treeOpt.InitPostorderTranvers(tr);
+    while (true)
+    {
+        RBTNode *pCurNode = tr.pCurNode;
+        //cout << "Outer loop pcurnode = " << (int)pCurNode << ", lvid = " << pCurNode->GetLeafId() << endl;
+        if (pCurNode->GetParent() == NULL)
+        {
+            // do not do the whole tree to remove, that is not valid
+            break;
+        }
 
+        // remember the sibling so we can re-attach it at the end
+        RBTNode *pSib = pCurNode->GetParent()->GetLeftChild();
+        if (pSib == pCurNode)
+        {
+            pSib = pCurNode->GetParent()->GetRightChild();
+        }
+
+        // now detach the subtree
+        // need to handle the special case when the root is removed
+        if (pCurNode->GetParent()->GetParent() == NULL)
+        {
+            treeOpt.pRoot = pSib;
+        }
+        pCurNode->DetachSubtree();
+        //set<int> clvs;
+        //pCurNode->GetLeaves( clvs );
+        //cout << "Current subtree has leafs = ";
+        //DumpIntSet( clvs );
+        //set<int> rlvs;
+        //treeOpt.pRoot->GetLeaves( rlvs );
+        //cout << "Remaing tree has leafs = ";
+        //DumpIntSet( rlvs );
+        //cout << "Current subtree = ";
+        //treeOpt.Dump();
+
+        // now do another search
+        TraversRecord tr2;
+        treeOpt.InitPostorderTranvers(tr2);
+        while (true)
+        {
+            //set<int> rlvs3;
+            //treeOpt.pRoot->GetLeaves( rlvs3 );
+            //cout << "During inner loop start, tree has leafs = ";
+            //DumpIntSet( rlvs3 );
+            //cout << "During internal loop, subtree = ";
+            //treeOpt.Dump();
+
+            // make sure this node is what we need:
+            // (1) must be a leaf
+            if (tr2.pCurNode->IsLeaf() == true &&
+                ((mapCherry1.size() == 1 && (pCurNode == pLeaf1 || pCurNode == pLeaf2 || tr2.pCurNode == pLeaf1 || tr2.pCurNode == pLeaf2)) || (mapCherry1.size() == 2 &&
+                                                                                                                                                (((pCurNode == pLeaf1 || pCurNode == pLeaf2) && (tr2.pCurNode == pLeaf3 || tr2.pCurNode == pLeaf4)) || ((pCurNode == pLeaf3 || pCurNode == pLeaf4) && (tr2.pCurNode == pLeaf1 || tr2.pCurNode == pLeaf2))))))
+            {
+
+                //cout << "Consider inner node = " << (int)tr2.pCurNode << ", leaf id = " << tr2.pCurNode->GetLeafId() << endl;
+                // try to re-attach to the node
+                RBTNode *pNewPar = pCurNode->AttachSubtree(tr2.pCurNode);
+                if (tr2.pCurNode == treeOpt.pRoot)
+                {
+                    // we created a new root
+                    treeOpt.pRoot = pNewPar;
+                }
+
+                // Test whether the morphed tree is the SAME as the other
+                if (treeOpt.IsSame(treeCmp) == true)
+                {
+                    // find it
+                    return true;
+                    //cout << "The SPR transformed subtree = ";
+                    //treeOpt.Dump();
+                }
+
+                // now we need to detach the node again
+                if (pCurNode->GetParent()->IsRoot() == true)
+                {
+                    // when root is removed, we have to re-adjust the root
+                    treeOpt.pRoot = tr2.pCurNode;
+                }
+                pCurNode->DetachSubtree();
+            }
+            // move to next
+            if (treeOpt.NextPostorderTranvers(tr2) == false)
+            {
+                break;
+            }
+        }
+        //cout << "Now attach the current subtree...\n";
+        // now re-attach the node
+        RBTNode *pnode = pCurNode->AttachSubtree(pSib);
+        if (treeOpt.pRoot == pSib)
+        {
+            //cout << "readjust root ...\n";
+            // we need to update the root again
+            treeOpt.pRoot = pnode;
+        }
+        //set<int> rlvs2;
+        //treeOpt.pRoot->GetLeaves( rlvs2 );
+        //cout << "After reattaching at the end of one round, tree has leafs = ";
+        //DumpIntSet( rlvs2 );
+        //cout << "After re-attaching the subtree = ";
+        //treeOpt.Dump();
+
+        // move to next
+        if (treeOpt.NextPostorderTranvers(tr) == false)
+        {
+            break;
+        }
+    }
+
+    // did not find
+    return false;
 
+#if 0
+	// now start real comparasion
     TraversRecord tr;
     treeOpt.InitPostorderTranvers(tr);
     while( true )
@@ -1368,7 +1396,7 @@ bool RBT ::IsOneSPRAway(const RBT &rbt) const {
             }
 
             // Test whether the morphed tree is the SAME as the other
-            if (treeOpt.IsSame( rbt ) == true  )
+            if (treeOpt.IsSame( treeCmp ) == true  )
 			{
 				// find it
 				return true;
@@ -1417,1011 +1445,1263 @@ bool RBT ::IsOneSPRAway(const RBT &rbt) const {
 	// did not find
 	return false;
 #endif
-}
 
-// 11/15/07: found an error: sometimes it passed in an invalid tree, then we get
-// problems TBD. need to figure out why this is happening This function is to
-// reduce two trees, such that the two trees' common parts are removed, only
-// different parts are left
-void RBT ::Consolidate(RBT &treeOpt, RBT &treeCmp) {
-  // cout << "ENTERING consolidate....\n";
-  YW_ASSERT_INFO(treeOpt.GetNodesNum() == treeCmp.GetNodesNum(),
-                 "Tree must be the same");
-  // create a map of leaf nodes ofr cmp tree
-  map<int, RBTNode *> mapCmpTreeLeafNodes;
-  TraversRecord tr1;
-  treeCmp.InitPostorderTranvers(tr1);
-  while (true) {
-    if (tr1.pCurNode->IsLeaf() == true) {
-      mapCmpTreeLeafNodes.insert(map<int, RBTNode *>::value_type(
-          tr1.pCurNode->GetLeafId(), tr1.pCurNode));
-    }
-    if (treeCmp.NextPostorderTranvers(tr1) == false) {
-      break;
-    }
-  }
-
-  // cout << "here1\n";
-  // reduce the two trees so that there is shared subtrees in them
-  // I do not understand why ONE-PATH does not work. Here just repeat until no
-  // nodes can be deleted
-  bool fNothingFound = false;
-  while (fNothingFound == false) {
-    // cout << "Current trees = ";
-    // treeOpt.Dump();
-    // treeCmp.Dump();
-    fNothingFound = true;
-    TraversRecord tr;
-    treeOpt.InitPostorderTranvers(tr);
-    bool fNodeDeleted = false;
-    while (true) {
-      //
-      if (tr.pCurNode->IsLeaf() == true) {
-        //
-        // if( tr.pCurNode->IsLeftChild() == true )
-        // we we start to delete, we only look for left child for now
-        YW_ASSERT_INFO(tr.pCurNode->GetParent() != NULL,
-                       "Can not be like this");
-        RBTNode *psib = tr.pCurNode->GetSibling();
-        YW_ASSERT_INFO(psib != NULL, "Wrong1.1.0");
-        if (psib->IsLeaf() == true) {
-          // now try to get the corresponding leaf in the other tree
-          RBTNode *pLeaf1Cmp = mapCmpTreeLeafNodes[tr.pCurNode->GetLeafId()];
-          RBTNode *pLeaf2Cmp = mapCmpTreeLeafNodes[psib->GetLeafId()];
-          if (pLeaf1Cmp == NULL) {
-            // treeOpt.Dump();
-            // treeCmp.Dump();
-            cout << "This node has been delted: " << tr.pCurNode->GetLeafId()
-                 << endl;
-          }
-          if (pLeaf2Cmp == NULL) {
-            // treeOpt.Dump();
-            // treeCmp.Dump();
-            cout << "This node has been delted: " << psib->GetLeafId() << endl;
-          }
-          // YW: for now, continue, need to fix it later. 11/15/07
-          YW_ASSERT_INFO(pLeaf1Cmp != NULL && pLeaf2Cmp != NULL, "Wrong1.1.1");
-          if (pLeaf1Cmp->GetParent() == pLeaf2Cmp->GetParent()) {
-
-            // Good, we find a pair, now we remove the right node
-            fNodeDeleted = true;
-            fNothingFound = false;
-            int sibidCmp = psib->GetLeafId();
-            pLeaf2Cmp->RemoveLeafSelf();
-            delete pLeaf2Cmp;
-            pLeaf2Cmp = NULL;
-            mapCmpTreeLeafNodes[sibidCmp] = NULL;
-
-            psib->RemoveLeafSelf();
-            delete psib;
-            psib = NULL;
-            // cout << "Leaf " << sibidCmp << " is deleted\n";
-            // if( tr.pCurNode->IsLeftChild() == false )
-            // if( tr.pCurNode-> )
-            //{
-            // Update current to left child
-            //
-            //	tr.pCurNode = psib;
-            //}
-          }
-        }
-      }
+#if 0
+	// testing whether it is one SPR away
+	// Simply try to morph the current tree t
+    // Double loop: first try every subtree of the original
+    // then try to attach it to each of the original node
+    // note, we do not want to re-generate trees many times
+    // so we need to re-attach the detached subtrees each time we need
+    RBT treeOpt(*this);
 
-      if (fNodeDeleted == true) {
-        // give one more chance
-        fNodeDeleted = false;
-        continue;
-      }
 
-      if (treeOpt.NextPostorderTranvers(tr) == false) {
-        break;
-      }
-    }
-  }
-  // cout << "here2\n";
+    TraversRecord tr;
+    treeOpt.InitPostorderTranvers(tr);
+    while( true )
+    {
+        RBTNode *pCurNode = tr.pCurNode;
+//cout << "Outer loop pcurnode = " << (int)pCurNode << ", lvid = " << pCurNode->GetLeafId() << endl;
+        if( pCurNode->GetParent() == NULL  )
+        {
+            // do not do the whole tree to remove, that is not valid
+            break;
+        }
+
+        // remember the sibling so we can re-attach it at the end
+        RBTNode *pSib = pCurNode->GetParent()->GetLeftChild();
+        if( pSib == pCurNode )
+        {
+            pSib = pCurNode->GetParent()->GetRightChild();
+        }
+
+
+
+        // now detach the subtree
+        // need to handle the special case when the root is removed
+        if( pCurNode->GetParent()->GetParent() == NULL )
+        {
+            treeOpt.pRoot = pSib;
+        }
+        pCurNode->DetachSubtree();
+//set<int> clvs;
+//pCurNode->GetLeaves( clvs );
+//cout << "Current subtree has leafs = ";
+//DumpIntSet( clvs );
+//set<int> rlvs;
+//treeOpt.pRoot->GetLeaves( rlvs );
+//cout << "Remaing tree has leafs = ";
+//DumpIntSet( rlvs );
+//cout << "Current subtree = ";
+//treeOpt.Dump();
+
+        // now do another search
+        TraversRecord tr2;
+        treeOpt.InitPostorderTranvers( tr2 );
+        while(true)
+        {
+//set<int> rlvs3;
+//treeOpt.pRoot->GetLeaves( rlvs3 );
+//cout << "During inner loop start, tree has leafs = ";
+//DumpIntSet( rlvs3 );
+//cout << "During internal loop, subtree = ";
+//treeOpt.Dump();
+
+
+//cout << "Consider inner node = " << (int)tr2.pCurNode << ", leaf id = " << tr2.pCurNode->GetLeafId() << endl;
+            // try to re-attach to the node
+            RBTNode *pNewPar = pCurNode->AttachSubtree(tr2.pCurNode);
+            if( tr2.pCurNode == treeOpt.pRoot )
+            {
+                // we created a new root
+                treeOpt.pRoot = pNewPar;
+            }
+
+            // Test whether the morphed tree is the SAME as the other
+            if (treeOpt.IsSame( rbt ) == true  )
+			{
+				// find it
+				return true;
+//cout << "The SPR transformed subtree = ";
+//treeOpt.Dump();
+			}
+
+            // now we need to detach the node again
+            if( pCurNode->GetParent()->IsRoot() == true  )
+            {
+                // when root is removed, we have to re-adjust the root
+                treeOpt.pRoot = tr2.pCurNode;
+            }
+            pCurNode->DetachSubtree();
+
+            // move to next
+            if( treeOpt.NextPostorderTranvers(tr2) == false )
+            {
+                break;
+            }
+
+        }
+//cout << "Now attach the current subtree...\n";
+        // now re-attach the node
+        RBTNode *pnode = pCurNode->AttachSubtree( pSib );
+        if( treeOpt.pRoot == pSib )
+        {
+//cout << "readjust root ...\n";
+            // we need to update the root again
+            treeOpt.pRoot = pnode;
+        }
+//set<int> rlvs2;
+//treeOpt.pRoot->GetLeaves( rlvs2 );
+//cout << "After reattaching at the end of one round, tree has leafs = ";
+//DumpIntSet( rlvs2 );
+//cout << "After re-attaching the subtree = ";
+//treeOpt.Dump();
+
+        // move to next
+        if( treeOpt.NextPostorderTranvers(tr) == false )
+        {
+            break;
+        }
+    }
+
+	// did not find
+	return false;
+#endif
 }
 
-bool RBT ::ReconstructNewick(const string &strNewick) {
-  // for now, call internal
-  RBTNode *pRootNew = ReconstructNewickInternal(strNewick);
-  if (pRootNew == NULL) {
-    // fail to build
-    return false;
-  }
-  // update current node
-  if (this->pRoot != NULL) {
-    this->pRoot->Clear();
-    delete pRoot;
-  }
-  this->pRoot = pRootNew;
-  return true;
+// 11/15/07: found an error: sometimes it passed in an invalid tree, then we get problems
+// TBD. need to figure out why this is happening
+// This function is to reduce two trees, such that the
+// two trees' common parts are removed, only different parts are left
+void RBT ::Consolidate(RBT &treeOpt, RBT &treeCmp)
+{
+    //cout << "ENTERING consolidate....\n";
+    YW_ASSERT_INFO(treeOpt.GetNodesNum() == treeCmp.GetNodesNum(), "Tree must be the same");
+    // create a map of leaf nodes ofr cmp tree
+    map<int, RBTNode *> mapCmpTreeLeafNodes;
+    TraversRecord tr1;
+    treeCmp.InitPostorderTranvers(tr1);
+    while (true)
+    {
+        if (tr1.pCurNode->IsLeaf() == true)
+        {
+            mapCmpTreeLeafNodes.insert(map<int, RBTNode *>::value_type(tr1.pCurNode->GetLeafId(), tr1.pCurNode));
+        }
+        if (treeCmp.NextPostorderTranvers(tr1) == false)
+        {
+            break;
+        }
+    }
+
+    //cout << "here1\n";
+    // reduce the two trees so that there is shared subtrees in them
+    // I do not understand why ONE-PATH does not work. Here just repeat until no nodes can be deleted
+    bool fNothingFound = false;
+    while (fNothingFound == false)
+    {
+        //cout << "Current trees = ";
+        //treeOpt.Dump();
+        //treeCmp.Dump();
+        fNothingFound = true;
+        TraversRecord tr;
+        treeOpt.InitPostorderTranvers(tr);
+        bool fNodeDeleted = false;
+        while (true)
+        {
+            //
+            if (tr.pCurNode->IsLeaf() == true)
+            {
+                //
+                //if( tr.pCurNode->IsLeftChild() == true )
+                // we we start to delete, we only look for left child for now
+                YW_ASSERT_INFO(tr.pCurNode->GetParent() != NULL, "Can not be like this");
+                RBTNode *psib = tr.pCurNode->GetSibling();
+                YW_ASSERT_INFO(psib != NULL, "Wrong1.1.0");
+                if (psib->IsLeaf() == true)
+                {
+                    // now try to get the corresponding leaf in the other tree
+                    RBTNode *pLeaf1Cmp = mapCmpTreeLeafNodes[tr.pCurNode->GetLeafId()];
+                    RBTNode *pLeaf2Cmp = mapCmpTreeLeafNodes[psib->GetLeafId()];
+                    if (pLeaf1Cmp == NULL)
+                    {
+                        //treeOpt.Dump();
+                        //treeCmp.Dump();
+                        cout << "This node has been delted: " << tr.pCurNode->GetLeafId() << endl;
+                    }
+                    if (pLeaf2Cmp == NULL)
+                    {
+                        //treeOpt.Dump();
+                        //treeCmp.Dump();
+                        cout << "This node has been delted: " << psib->GetLeafId() << endl;
+                    }
+                    // YW: for now, continue, need to fix it later. 11/15/07
+                    YW_ASSERT_INFO(pLeaf1Cmp != NULL && pLeaf2Cmp != NULL, "Wrong1.1.1");
+                    if (pLeaf1Cmp->GetParent() == pLeaf2Cmp->GetParent())
+                    {
+
+                        // Good, we find a pair, now we remove the right node
+                        fNodeDeleted = true;
+                        fNothingFound = false;
+                        int sibidCmp = psib->GetLeafId();
+                        pLeaf2Cmp->RemoveLeafSelf();
+                        delete pLeaf2Cmp;
+                        pLeaf2Cmp = NULL;
+                        mapCmpTreeLeafNodes[sibidCmp] = NULL;
+
+                        psib->RemoveLeafSelf();
+                        delete psib;
+                        psib = NULL;
+                        //cout << "Leaf " << sibidCmp << " is deleted\n";
+                        //if( tr.pCurNode->IsLeftChild() == false )
+                        //if( tr.pCurNode-> )
+                        //{
+                        // Update current to left child
+                        //
+                        //	tr.pCurNode = psib;
+                        //}
+                    }
+                }
+            }
+
+            if (fNodeDeleted == true)
+            {
+                // give one more chance
+                fNodeDeleted = false;
+                continue;
+            }
+
+            if (treeOpt.NextPostorderTranvers(tr) == false)
+            {
+                break;
+            }
+        }
+    }
+    //cout << "here2\n";
+}
+
+bool RBT ::ReconstructNewick(const string &strNewick)
+{
+    // for now, call internal
+    RBTNode *pRootNew = ReconstructNewickInternal(strNewick);
+    if (pRootNew == NULL)
+    {
+        // fail to build
+        return false;
+    }
+    // update current node
+    if (this->pRoot != NULL)
+    {
+        this->pRoot->Clear();
+        delete pRoot;
+    }
+    this->pRoot = pRootNew;
+    return true;
 }
 
-void RBT ::CollectTips() {
-  mapTipPtrs.clear();
+void RBT ::CollectTips()
+{
+    mapTipPtrs.clear();
 
-  //
-  TraversRecord tr;
-  InitPostorderTranvers(tr);
-  while (true) {
     //
-    if (tr.pCurNode->IsLeaf() == true) {
-      mapTipPtrs.insert(map<int, RBTNode *>::value_type(
-          tr.pCurNode->GetLeafId(), tr.pCurNode));
-    }
+    TraversRecord tr;
+    InitPostorderTranvers(tr);
+    while (true)
+    {
+        //
+        if (tr.pCurNode->IsLeaf() == true)
+        {
+            mapTipPtrs.insert(map<int, RBTNode *>::value_type(tr.pCurNode->GetLeafId(), tr.pCurNode));
+        }
 
-    // continue
-    if (NextPostorderTranvers(tr) == false) {
-      break;
+        // continue
+        if (NextPostorderTranvers(tr) == false)
+        {
+            break;
+        }
     }
-  }
 }
 
-RBTNode *RBT ::GetTip(int id) {
-  if (mapTipPtrs.find(id) != mapTipPtrs.end()) {
-    return mapTipPtrs[id];
-  } else {
-    return NULL;
-  }
+RBTNode *RBT ::GetTip(int id)
+{
+    if (mapTipPtrs.find(id) != mapTipPtrs.end())
+    {
+        return mapTipPtrs[id];
+    }
+    else
+    {
+        return NULL;
+    }
 }
 
-void RBT ::GetAllTips(vector<RBTNode *> &tips) {
-  for (map<int, RBTNode *>::iterator it = mapTipPtrs.begin();
-       it != mapTipPtrs.end(); ++it) {
-    tips.push_back(it->second);
-  }
+void RBT ::GetAllTips(vector<RBTNode *> &tips)
+{
+    for (map<int, RBTNode *>::iterator it = mapTipPtrs.begin(); it != mapTipPtrs.end(); ++it)
+    {
+        tips.push_back(it->second);
+    }
 }
 
-bool RBT ::AddLeaf(int pos) {
-  // make sure this is a good position
-  if (pos >= 2 * numLeaves - 1) {
-    // bad position
-    return false;
-  }
+bool RBT ::AddLeaf(int pos)
+{
+    // make sure this is a good position
+    if (pos >= 2 * numLeaves - 1)
+    {
+        // bad position
+        return false;
+    }
 
-  // now add to the leaf
-  InternalAddleaf(numLeaves, pos);
+    // now add to the leaf
+    InternalAddleaf(numLeaves, pos);
 
-  // inc num of leaves
-  numLeaves++;
+    // inc num of leaves
+    numLeaves++;
 
-  // clean up
-  mapSplitsInTree.clear();
-  this->tid = MapToId();
-  return true;
+    // clean up
+    mapSplitsInTree.clear();
+    this->tid = MapToId();
+    return true;
 }
 
 // compare
-int RBT ::Compare(RBT &rhs) {
-  // simply find how many splits are common in two trees
-  // collect two sets of splits
-  vector<set<int> > listSplitsRHS;
-  rhs.GetAllSplits(listSplitsRHS);
-  set<set<int> > setSplitsRHS;
-  for (int i = 0; i < (int)listSplitsRHS.size(); ++i) {
-    setSplitsRHS.insert(listSplitsRHS[i]);
-  }
-  vector<set<int> > listSplits;
-  this->GetAllSplits(listSplits);
-  int res = 0;
-  for (int i = 0; i < (int)listSplits.size(); ++i) {
-    if (setSplitsRHS.find(listSplits[i]) != setSplitsRHS.end()) {
-      // find oe shared
-      res++;
-    }
-  }
-  return res;
-}
-bool RBT ::IsSameUnrootedTree(RBT &rhs) {
-  // simply find how many splits are common in two trees
-  // collect two sets of splits
-  vector<set<int> > listSplitsRHS;
-  rhs.GetAllSplits(listSplitsRHS);
-  set<set<int> > setSplitsRHS;
-  for (int i = 0; i < (int)listSplitsRHS.size(); ++i) {
-    setSplitsRHS.insert(listSplitsRHS[i]);
-  }
-  vector<set<int> > listSplits;
-  this->GetAllSplits(listSplits);
-  for (int i = 0; i < (int)listSplits.size(); ++i) {
-    if (setSplitsRHS.find(listSplits[i]) == setSplitsRHS.end()) {
-      // find oe shared
-      return false;
-    }
-  }
-  return true;
+int RBT ::Compare(RBT &rhs)
+{
+    // simply find how many splits are common in two trees
+    // collect two sets of splits
+    vector<set<int>> listSplitsRHS;
+    rhs.GetAllSplits(listSplitsRHS);
+    set<set<int>> setSplitsRHS;
+    for (int i = 0; i < (int)listSplitsRHS.size(); ++i)
+    {
+        setSplitsRHS.insert(listSplitsRHS[i]);
+    }
+    vector<set<int>> listSplits;
+    this->GetAllSplits(listSplits);
+    int res = 0;
+    for (int i = 0; i < (int)listSplits.size(); ++i)
+    {
+        if (setSplitsRHS.find(listSplits[i]) != setSplitsRHS.end())
+        {
+            // find oe shared
+            res++;
+        }
+    }
+    return res;
+}
+bool RBT ::IsSameUnrootedTree(RBT &rhs)
+{
+    // simply find how many splits are common in two trees
+    // collect two sets of splits
+    vector<set<int>> listSplitsRHS;
+    rhs.GetAllSplits(listSplitsRHS);
+    set<set<int>> setSplitsRHS;
+    for (int i = 0; i < (int)listSplitsRHS.size(); ++i)
+    {
+        setSplitsRHS.insert(listSplitsRHS[i]);
+    }
+    vector<set<int>> listSplits;
+    this->GetAllSplits(listSplits);
+    for (int i = 0; i < (int)listSplits.size(); ++i)
+    {
+        if (setSplitsRHS.find(listSplits[i]) == setSplitsRHS.end())
+        {
+            // find oe shared
+            return false;
+        }
+    }
+    return true;
 }
 
 ///////////////////////////////////////////////////////////////////////////////////////
 
-RBTNode *RBT ::ReconstructNewickInternal(const string &strNewick) {
-  // Build RBT by a given Newick string
-  // NOTE: we assume the tree is in the form of (1,(2,3)) form
-  // THAT IS, WE DO NOT ALLOW PRECEEDING SYMBOLS
-  // return the constructed root node for the current substring
-  // define commonly used symbol in Newick
-  // const char cTerm = ';';
-
-  // this function builds recursively subtrees for this part of string
-  // First, is this string a leaf or not
-  if (strNewick[0] != '(') {
-    // Yes, this is a leaf
-    int nodeId;
-    sscanf(strNewick.c_str(), "%d", &nodeId);
-    // cout << "leaf id = " << nodeId << endl;
-
-    // the ID of ms is by convention, one larger (starting from 1)
-    // so decrement by one
-
-    RBTNode *pLeaf = new RBTNode(nodeId - 1);
-    return pLeaf;
-  } else {
-    // This is not a leaf
-    // so we create underlying level for it
-    // TreeNode *pInternal = new TreeNode( invId++  );
-    RBTNode *pLeftChild = NULL;
-    RBTNode *pRightChild = NULL;
-    int lastpos = 1;
-    int curpos = 0;
-    int parnet = 0; // (: +1, ) -1
-    while (true) {
-      // cout << "curpos = " << curpos << endl;
-
-      if (curpos >= (int)strNewick.size()) {
-        // we are done
-        break;
-      }
-
-      // keep balance
-      if (strNewick[curpos] == '(') {
-        parnet++;
-      } else if (strNewick[curpos] == ')') {
-        parnet--;
-
-        // when parnet = 0, we know we end
-        if (parnet == 0) {
-          // now adding the last piece
-          // create a new node
-          int strl = curpos - lastpos;
-          string subs = strNewick.substr(lastpos, strl);
-          //    cout << "last subs = " << subs << endl;
-          pLeftChild = ReconstructNewickInternal(subs);
-
-          // aslo update lastpos
-          lastpos = curpos + 1;
-        }
-
-      } else if (strNewick[curpos] == ',') {
-        // Yes, this is a sepeartor, but we only start to process it when the
-        // balance of parenetnis is right
-        if (parnet == 1) {
-          // create a new node
-          int strl = curpos - lastpos;
-          string subs = strNewick.substr(lastpos, strl);
-          //    cout << "subs = " << subs << endl;
-          pRightChild = ReconstructNewickInternal(subs);
-
-          // aslo update lastpos
-          lastpos = curpos + 1;
-        }
-      }
-
-      // now move to next pos
-      curpos++;
-    }
-
-    YW_ASSERT_INFO(pLeftChild != NULL && pRightChild != NULL, "Children wrong");
-    RBTNode *pInternal;
-    if (pLeftChild->GetMinLeaveId() < pRightChild->GetMinLeaveId()) {
-      pInternal = new RBTNode(pLeftChild, pRightChild);
-    } else {
-      pInternal = new RBTNode(pRightChild, pLeftChild);
-    }
-    return pInternal;
-  }
-
-  // reconstruct tree by the given Newick format
-  // int spos = 0;
-  // while( spos < (int) strNewick.size()   )
-  //{
-  //    if(  strNewick[spos] == cTerm  )
-  //    {
-  //        break;
-  //    }
-  //    // Skip things until we find the first (
-  //}
-}
-
-/////////////////////////////////////////////////////////////////////////////////////
-void RBT ::Init() {
-  pRoot = NULL;
-  tid = -1; // not initialized
-  numLeaves = 0;
-}
-
-void RBT ::ReconstructById(RBT_ID tid) {
-  // cout << "ReconstructById\n";
-  // first clear the old tree if any
-  if (pRoot != NULL) {
-    pRoot->Clear();
-    delete pRoot;
-    pRoot = NULL;
-  }
-
-  vector<int> leavesEdgeIndices(numLeaves);
-  leavesEdgeIndices[0] = 0;
-  leavesEdgeIndices[1] = 0;
-
-  // reconstruct the tree by its ID
-  // first restrive the edge ids
-  int idUse = tid;
-  for (int lv = numLeaves - 1; lv >= 2; --lv) {
-    int base = 2 * lv - 1;
-    int eid = idUse % base;
-    leavesEdgeIndices[lv] = eid;
-    idUse = idUse / base;
-  }
-  // create a tree with two leaves
-  RBTNode *pn0 = new RBTNode(0);
-  // cout << "pn0 = " << (int) pn0 << endl;
-  RBTNode *pn1 = new RBTNode(1);
-  // cout << "pn1 = " << (int) pn1 << endl;
-  RBTNode *prn = new RBTNode(pn0, pn1);
-  // cout << "prn = " << (int) prn << endl;
-  this->pRoot = prn;
-
-  // now start to insert nodes from the third leaf
-  for (int lv = 2; lv < numLeaves; ++lv) {
-    // cout << "lv = " << lv << ", in construction\n";
-    // make sure the index make sense
-    int eid = leavesEdgeIndices[lv];
-    YW_ASSERT_INFO(eid < 2 * lv - 1, "eid too large");
-
-    InternalAddleaf(lv, eid);
-
-    // cout << "eid = " << eid << endl;
-    /*
-            // travere the current tree, and stop at the index
-            TraversRecord tr;
-            InitPostorderTranvers(tr);
-            int cureid = 0;
-            while(true)
+RBTNode *RBT ::ReconstructNewickInternal(const string &strNewick)
+{
+    // Build RBT by a given Newick string
+    // NOTE: we assume the tree is in the form of (1,(2,3)) form
+    // THAT IS, WE DO NOT ALLOW PRECEEDING SYMBOLS
+    // return the constructed root node for the current substring
+    // define commonly used symbol in Newick
+    //const char cTerm = ';';
+
+    // this function builds recursively subtrees for this part of string
+    // First, is this string a leaf or not
+    if (strNewick[0] != '(')
+    {
+        // Yes, this is a leaf
+        int nodeId;
+        sscanf(strNewick.c_str(), "%d", &nodeId);
+        //cout << "leaf id = " << nodeId << endl;
+
+        // the ID of ms is by convention, one larger (starting from 1)
+        // so decrement by one
+
+        RBTNode *pLeaf = new RBTNode(nodeId - 1);
+        return pLeaf;
+    }
+    else
+    {
+        // This is not a leaf
+        // so we create underlying level for it
+        //TreeNode *pInternal = new TreeNode( invId++  );
+        RBTNode *pLeftChild = NULL;
+        RBTNode *pRightChild = NULL;
+        int lastpos = 1;
+        int curpos = 0;
+        int parnet = 0; // (: +1, ) -1
+        while (true)
+        {
+            //cout << "curpos = " << curpos << endl;
+
+            if (curpos >= (int)strNewick.size())
             {
-                if( cureid == eid )
+                // we are done
+                break;
+            }
+
+            // keep balance
+            if (strNewick[curpos] == '(')
+            {
+                parnet++;
+            }
+            else if (strNewick[curpos] == ')')
+            {
+                parnet--;
+
+                // when parnet = 0, we know we end
+                if (parnet == 0)
                 {
-                    // find it!
-                    break;
+                    // now adding the last piece
+                    // create a new node
+                    int strl = curpos - lastpos;
+                    string subs = strNewick.substr(lastpos, strl);
+                    //    cout << "last subs = " << subs << endl;
+                    pLeftChild = ReconstructNewickInternal(subs);
+
+                    // aslo update lastpos
+                    lastpos = curpos + 1;
                 }
-                else
+            }
+            else if (strNewick[curpos] == ',')
+            {
+                // Yes, this is a sepeartor, but we only start to process it when the
+                // balance of parenetnis is right
+                if (parnet == 1)
                 {
-                    // continue
-                    NextPostorderTranvers(tr);
+                    // create a new node
+                    int strl = curpos - lastpos;
+                    string subs = strNewick.substr(lastpos, strl);
+                    //    cout << "subs = " << subs << endl;
+                    pRightChild = ReconstructNewickInternal(subs);
+
+                    // aslo update lastpos
+                    lastpos = curpos + 1;
                 }
+            }
 
-                if( cureid >= 2*lv-1 )
-                {
-                    // should not come here
-                    YW_ASSERT_INFO(false, "Should not be here");
-                    break;
-                }
-                // update
-                cureid ++;
+            // now move to next pos
+            curpos++;
+        }
+
+        YW_ASSERT_INFO(pLeftChild != NULL && pRightChild != NULL, "Children wrong");
+        RBTNode *pInternal;
+        if (pLeftChild->GetMinLeaveId() < pRightChild->GetMinLeaveId())
+        {
+            pInternal = new RBTNode(pLeftChild, pRightChild);
+        }
+        else
+        {
+            pInternal = new RBTNode(pRightChild, pLeftChild);
+        }
+        return pInternal;
+    }
+
+    // reconstruct tree by the given Newick format
+    //int spos = 0;
+    //while( spos < (int) strNewick.size()   )
+    //{
+    //    if(  strNewick[spos] == cTerm  )
+    //    {
+    //        break;
+    //    }
+    //    // Skip things until we find the first (
+    //}
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+void RBT ::Init()
+{
+    pRoot = NULL;
+    tid = -1; // not initialized
+    numLeaves = 0;
+}
+
+void RBT ::ReconstructById(RBT_ID tid)
+{
+    //cout << "ReconstructById\n";
+    // first clear the old tree if any
+    if (pRoot != NULL)
+    {
+        pRoot->Clear();
+        delete pRoot;
+        pRoot = NULL;
+    }
+
+    vector<int> leavesEdgeIndices(numLeaves);
+    leavesEdgeIndices[0] = 0;
+    leavesEdgeIndices[1] = 0;
+
+    // reconstruct the tree by its ID
+    // first restrive the edge ids
+    int idUse = tid;
+    for (int lv = numLeaves - 1; lv >= 2; --lv)
+    {
+        int base = 2 * lv - 1;
+        int eid = idUse % base;
+        leavesEdgeIndices[lv] = eid;
+        idUse = idUse / base;
+    }
+    // create a tree with two leaves
+    RBTNode *pn0 = new RBTNode(0);
+    //cout << "pn0 = " << (int) pn0 << endl;
+    RBTNode *pn1 = new RBTNode(1);
+    //cout << "pn1 = " << (int) pn1 << endl;
+    RBTNode *prn = new RBTNode(pn0, pn1);
+    //cout << "prn = " << (int) prn << endl;
+    this->pRoot = prn;
+
+    // now start to insert nodes from the third leaf
+    for (int lv = 2; lv < numLeaves; ++lv)
+    {
+        //cout << "lv = " << lv << ", in construction\n";
+        // make sure the index make sense
+        int eid = leavesEdgeIndices[lv];
+        YW_ASSERT_INFO(eid < 2 * lv - 1, "eid too large");
+
+        InternalAddleaf(lv, eid);
+
+        //cout << "eid = " << eid << endl;
+        /*
+        // travere the current tree, and stop at the index
+        TraversRecord tr;
+        InitPostorderTranvers(tr);
+        int cureid = 0;
+        while(true)
+        {
+            if( cureid == eid )
+            {
+                // find it!
+                break;
             }
-    //cout << "cureid = " << cureid << endl;
-            // now add this. Need to consider whether this is the root or not
-            if( tr.pCurNode == pRoot )
+            else
             {
-                RBTNode *pNewRoot = pRoot->AddSibling( lv );
-                this->pRoot = pNewRoot;
-    //cout << "Update root to " << (int)pNewRoot << endl;
+                // continue
+                NextPostorderTranvers(tr);
+            }
+
+            if( cureid >= 2*lv-1 )
+            {
+                // should not come here
+                YW_ASSERT_INFO(false, "Should not be here");
+                break;
+            }
+            // update
+            cureid ++;
+        }
+//cout << "cureid = " << cureid << endl;
+        // now add this. Need to consider whether this is the root or not
+        if( tr.pCurNode == pRoot )
+        {
+            RBTNode *pNewRoot = pRoot->AddSibling( lv );
+            this->pRoot = pNewRoot;
+//cout << "Update root to " << (int)pNewRoot << endl;
+        }
+        else
+        {
+            // then simply add it to parent's proper position
+            if( tr.pCurNode->IsLeftChild() == true )
+            {
+                tr.pCurNode->GetParent()->AddToLeftEdge(lv);
             }
             else
             {
-                // then simply add it to parent's proper position
-                if( tr.pCurNode->IsLeftChild() == true )
-                {
-                    tr.pCurNode->GetParent()->AddToLeftEdge(lv);
-                }
-                else
-                {
-                    tr.pCurNode->GetParent()->AddToRightEdge(lv);
-                }
+                tr.pCurNode->GetParent()->AddToRightEdge(lv);
             }
-            */
-  }
+        }
+        */
+    }
 
-  // before return, save the clusters
-  // TBD
-  // YW_ASSERT_INFO(false, "not implemented");
+    // before return, save the clusters
+    // TBD
+    //YW_ASSERT_INFO(false, "not implemented");
 }
 
 // handle insertion of a new leaf
 // note: we only allow SEQENTIALLY INSERTION OF LEAVES
-bool RBT ::InternalAddleaf(int lvid, int pos) {
-  // travere the current tree, and stop at the index
-  TraversRecord tr;
-  InitPostorderTranvers(tr);
-  int cureid = 0;
-  while (true) {
-    if (cureid == pos) {
-      // find it!
-      break;
-    } else {
-      // continue
-      NextPostorderTranvers(tr);
-    }
-
-    if (cureid >= 2 * lvid - 1) {
-      // should not come here
-      YW_ASSERT_INFO(false, "Should not be here2");
-      break;
-    }
-    // update
-    cureid++;
-  }
-  // cout << "cureid = " << cureid << endl;
-  // now add this. Need to consider whether this is the root or not
-  if (tr.pCurNode == pRoot) {
-    RBTNode *pNewRoot = pRoot->AddSibling(lvid);
-    this->pRoot = pNewRoot;
-    // cout << "Update root to " << (int)pNewRoot << endl;
-  } else {
-    // then simply add it to parent's proper position
-    if (tr.pCurNode->IsLeftChild() == true) {
-      tr.pCurNode->GetParent()->AddToLeftEdge(lvid);
-    } else {
-      tr.pCurNode->GetParent()->AddToRightEdge(lvid);
-    }
-  }
-  return true;
-}
-
-RBT_ID RBT ::MapToId() {
-  // The scheme needs to be carefully worked out
-  // We use the enumeration index of the leave as the id base
-  // That is, id = [id2, id3, id4, ..., idk]
-  // where idi indicates which edge we pick in the RBT when inserting leaf-i
-  // We need to choose a way to assign number to (partial-completed)-tree edges
-  // we do so by post-order traversal: an edge is assign the POT order to the
-  // corresponding node (as the one towards the leaves of the tree)
-
-  YW_ASSERT_INFO(numLeaves >= 3, "Too few leaves");
-  // map the tree to an ID
-  // we save a vector of indices, which indicates on which edge the split is
-  // from
-  vector<int> leavesEdgeIndices(numLeaves);
-  leavesEdgeIndices[0] = 0;
-  leavesEdgeIndices[1] = 0;
-
-  // reconstruct a new tree by copying
-  RBT treeNew(*this);
-  // cout << "Tree copied. \n";
-  // start from third leave
-  for (int lv = numLeaves - 1; lv >= 2; --lv) {
-    // cout << "lv = " << lv << endl;
-    // find out where is this leave
-    int ponid = -1;
-    RBTNode *pLeaf = treeNew.FindLeaf(lv, ponid);
-    YW_ASSERT_INFO(pLeaf != NULL, "Fail in getting a leaf");
-    // cout << "ponid = " << ponid << endl;
-    if (pLeaf->IsLeftChild() == true) {
-      // if LEFt child, then ponid is TRUE
-      // so no change here
-    } else {
-      // cout << "It is right child\n";
-      // if it is RIGHT child, then in the original insert,
-      // it is put at ponid-1 edge
-      ponid--;
-    }
-    // remmeber this ponid
-    leavesEdgeIndices[lv] = ponid;
-    // remove this lv
-    // here is not very robust, but since we are not deleting the whole thing
-    // so it should be OK
-    // cout << "leaf id = " << pLeaf->GetLeafId() << endl;
-
-    // update root
-    if (pLeaf->GetParent() != NULL && pLeaf->GetParent()->GetParent() == NULL) {
-      // cout << "UPdate root\n";
-      // in this case, update pRoot
-      if (pLeaf->IsLeftChild() == true) {
-        treeNew.pRoot = pLeaf->GetParent()->GetRightChild();
-      } else {
-        // cout << "Get left child\n";
-        treeNew.pRoot = pLeaf->GetParent()->GetLeftChild();
-        // cout << "Number of remaining leafs = " <<
-        // treeNew.pRoot->GetNumLeavesUnder() << endl; cout << "pRoot = " <<
-        // (int) treeNew.pRoot << endl;
-      }
-    }
-
-    pLeaf->RemoveLeafSelf();
-    // cout << "After removing self\n";
-    delete pLeaf;
-    pLeaf = NULL;
-    // cout << "here0\n";
-  }
-  // cout << "here\n";
-  // cout << "Edge ids = ";
-  // DumpIntVec( leavesEdgeIndices );
-  // now we have the id we want as follows
-  int res = 0;
-  for (int lv = 2; lv < numLeaves; ++lv) {
-    int base = 2 * lv - 1;
-    res = res * base + leavesEdgeIndices[lv];
-  }
-  // cout << "res = " << res << endl;
-  return res;
-}
-
-bool RBT ::RemoveLeaf(int lvid) {
-  // first find the leaf
-  int dummy;
-  RBTNode *plf = pRoot->FindLeaf(lvid, dummy);
-  if (plf == NULL) {
-    // can not find the leaf
-    return false;
-  }
-
-  // caution: if the leave is dirctly under root. then we have to change ROOT!
-  if (plf->GetParent() == this->pRoot) {
-    // set root to the sibling
-    this->pRoot = plf->GetSibling();
-    YW_ASSERT_INFO(this->pRoot != NULL, "Wrong: root becomes bad!");
-  }
-
-  plf->RemoveLeafSelf();
-  // delete plf;
-  plf = NULL;
-  return true;
-}
-
-bool RBT ::IsSame(const RBT &tr) const {
-  // test two trees are equivalent or not
-  string trs = tr.GetNewick();
-  string s0 = GetNewick();
-  // when the leaf are ordered in a specific way,
-  // two RBTs are the same iff Newick string is the same
-  return trs == s0;
-}
-string RBT ::GetNewick() const {
-  YW_ASSERT_INFO(pRoot != NULL, "Fail");
-  return pRoot->GetNewick();
-}
-
-void RBT ::PruneLargeIdNodes(int idThres) {
-  // get rid of id that is too large. possibly due to ARG issue
-  // simply do an iteration
-  TraversRecord tr;
-  InitPostorderTranvers(tr);
-  while (true) {
-    //
-    if (tr.pCurNode->IsLeaf() == true) {
-      if (tr.pCurNode->GetLeafId() >= idThres) {
-        // update current node
-        RBTNode *pn = tr.pCurNode;
-        RBTNode *pParNodeRem = pn->GetParent(); // the parent node is also gone
-        // remove it
-        NextPostorderTranvers(tr);
-        if (tr.pCurNode == pParNodeRem) {
-          NextPostorderTranvers(tr);
+bool RBT ::InternalAddleaf(int lvid, int pos)
+{
+    // travere the current tree, and stop at the index
+    TraversRecord tr;
+    InitPostorderTranvers(tr);
+    int cureid = 0;
+    while (true)
+    {
+        if (cureid == pos)
+        {
+            // find it!
+            break;
+        }
+        else
+        {
+            // continue
+            NextPostorderTranvers(tr);
+        }
+
+        if (cureid >= 2 * lvid - 1)
+        {
+            // should not come here
+            YW_ASSERT_INFO(false, "Should not be here2");
+            break;
+        }
+        // update
+        cureid++;
+    }
+    //cout << "cureid = " << cureid << endl;
+    // now add this. Need to consider whether this is the root or not
+    if (tr.pCurNode == pRoot)
+    {
+        RBTNode *pNewRoot = pRoot->AddSibling(lvid);
+        this->pRoot = pNewRoot;
+        //cout << "Update root to " << (int)pNewRoot << endl;
+    }
+    else
+    {
+        // then simply add it to parent's proper position
+        if (tr.pCurNode->IsLeftChild() == true)
+        {
+            tr.pCurNode->GetParent()->AddToLeftEdge(lvid);
+        }
+        else
+        {
+            tr.pCurNode->GetParent()->AddToRightEdge(lvid);
         }
-        // cout << "Node extra removed: "  << pn->GetLeafId() << endl;
-        pn->RemoveLeafSelf();
-        // delete pn;
-        pn = NULL;
-        continue;
-      }
+    }
+    return true;
+}
+
+RBT_ID RBT ::MapToId()
+{
+    // The scheme needs to be carefully worked out
+    // We use the enumeration index of the leave as the id base
+    // That is, id = [id2, id3, id4, ..., idk]
+    // where idi indicates which edge we pick in the RBT when inserting leaf-i
+    // We need to choose a way to assign number to (partial-completed)-tree edges
+    // we do so by post-order traversal: an edge is assign the POT order to the corresponding
+    // node (as the one towards the leaves of the tree)
+
+    YW_ASSERT_INFO(numLeaves >= 3, "Too few leaves");
+    // map the tree to an ID
+    // we save a vector of indices, which indicates on which edge the split is from
+    vector<int> leavesEdgeIndices(numLeaves);
+    leavesEdgeIndices[0] = 0;
+    leavesEdgeIndices[1] = 0;
+
+    // reconstruct a new tree by copying
+    RBT treeNew(*this);
+    //cout << "Tree copied. \n";
+    // start from third leave
+    for (int lv = numLeaves - 1; lv >= 2; --lv)
+    {
+        //cout << "lv = " << lv << endl;
+        // find out where is this leave
+        int ponid = -1;
+        RBTNode *pLeaf = treeNew.FindLeaf(lv, ponid);
+        YW_ASSERT_INFO(pLeaf != NULL, "Fail in getting a leaf");
+        //cout << "ponid = " << ponid << endl;
+        if (pLeaf->IsLeftChild() == true)
+        {
+            // if LEFt child, then ponid is TRUE
+            // so no change here
+        }
+        else
+        {
+            //cout << "It is right child\n";
+            // if it is RIGHT child, then in the original insert,
+            // it is put at ponid-1 edge
+            ponid--;
+        }
+        // remmeber this ponid
+        leavesEdgeIndices[lv] = ponid;
+        // remove this lv
+        // here is not very robust, but since we are not deleting the whole thing
+        // so it should be OK
+        //cout << "leaf id = " << pLeaf->GetLeafId() << endl;
+
+        // update root
+        if (pLeaf->GetParent() != NULL && pLeaf->GetParent()->GetParent() == NULL)
+        {
+            //cout << "UPdate root\n";
+            // in this case, update pRoot
+            if (pLeaf->IsLeftChild() == true)
+            {
+                treeNew.pRoot = pLeaf->GetParent()->GetRightChild();
+            }
+            else
+            {
+                //cout << "Get left child\n";
+                treeNew.pRoot = pLeaf->GetParent()->GetLeftChild();
+                //cout << "Number of remaining leafs = " << treeNew.pRoot->GetNumLeavesUnder() << endl;
+                //cout << "pRoot = " << (int) treeNew.pRoot << endl;
+            }
+        }
+
+        pLeaf->RemoveLeafSelf();
+        //cout << "After removing self\n";
+        delete pLeaf;
+        pLeaf = NULL;
+        //cout << "here0\n";
+    }
+    //cout << "here\n";
+    //cout << "Edge ids = ";
+    //DumpIntVec( leavesEdgeIndices );
+    // now we have the id we want as follows
+    int res = 0;
+    for (int lv = 2; lv < numLeaves; ++lv)
+    {
+        int base = 2 * lv - 1;
+        res = res * base + leavesEdgeIndices[lv];
+    }
+    //cout << "res = " << res << endl;
+    return res;
+}
+
+bool RBT ::RemoveLeaf(int lvid)
+{
+    // first find the leaf
+    int dummy;
+    RBTNode *plf = pRoot->FindLeaf(lvid, dummy);
+    if (plf == NULL)
+    {
+        // can not find the leaf
+        return false;
     }
 
-    // continue
-    if (NextPostorderTranvers(tr) == false) {
-      break;
+    // caution: if the leave is dirctly under root. then we have to change ROOT!
+    if (plf->GetParent() == this->pRoot)
+    {
+        // set root to the sibling
+        this->pRoot = plf->GetSibling();
+        YW_ASSERT_INFO(this->pRoot != NULL, "Wrong: root becomes bad!");
+    }
+
+    plf->RemoveLeafSelf();
+    //delete plf;
+    plf = NULL;
+    return true;
+}
+
+bool RBT ::IsSame(const RBT &tr) const
+{
+    // test two trees are equivalent or not
+    string trs = tr.GetNewick();
+    string s0 = GetNewick();
+    // when the leaf are ordered in a specific way,
+    // two RBTs are the same iff Newick string is the same
+    return trs == s0;
+}
+string RBT ::GetNewick() const
+{
+    YW_ASSERT_INFO(pRoot != NULL, "Fail");
+    return pRoot->GetNewick();
+}
+
+void RBT ::PruneLargeIdNodes(int idThres)
+{
+    // get rid of id that is too large. possibly due to ARG issue
+    // simply do an iteration
+    TraversRecord tr;
+    InitPostorderTranvers(tr);
+    while (true)
+    {
+        //
+        if (tr.pCurNode->IsLeaf() == true)
+        {
+            if (tr.pCurNode->GetLeafId() >= idThres)
+            {
+                // update current node
+                RBTNode *pn = tr.pCurNode;
+                RBTNode *pParNodeRem = pn->GetParent(); // the parent node is also gone
+                // remove it
+                NextPostorderTranvers(tr);
+                if (tr.pCurNode == pParNodeRem)
+                {
+                    NextPostorderTranvers(tr);
+                }
+                //cout << "Node extra removed: "  << pn->GetLeafId() << endl;
+                pn->RemoveLeafSelf();
+                //delete pn;
+                pn = NULL;
+                continue;
+            }
+        }
+
+        // continue
+        if (NextPostorderTranvers(tr) == false)
+        {
+            break;
+        }
     }
-  }
 }
 
 //////////////////////////////////////////////////////////////////////////////////
 
-bool RBT ::InitPostorderTranvers(TraversRecord &tr) {
-  YW_ASSERT_INFO(pRoot != NULL, "Tree not initialized");
+bool RBT ::InitPostorderTranvers(TraversRecord &tr)
+{
+    YW_ASSERT_INFO(pRoot != NULL, "Tree not initialized");
 
-  // move down to the left-most leave (should be 0, verify it)
-  RBTNode *pcur = this->pRoot->GetLeftMostChild();
-  //    YW_ASSERT_INFO( pcur->GetLeafId() == 0, "The leftmost leaf must be 0" );
-  tr.pCurNode = pcur;
-  return true;
+    // move down to the left-most leave (should be 0, verify it)
+    RBTNode *pcur = this->pRoot->GetLeftMostChild();
+    //    YW_ASSERT_INFO( pcur->GetLeafId() == 0, "The leftmost leaf must be 0" );
+    tr.pCurNode = pcur;
+    return true;
 }
 
-bool RBT ::NextPostorderTranvers(TraversRecord &tr) {
-  // if we are at the root, we are done
-  RBTNode *pCur = tr.pCurNode;
-  if (pCur->GetParent() == NULL) {
-    return false;
-  }
-
-  // if this is the left child, now move to right
-  if (pCur->IsLeftChild() == true) {
-    // start still from the left leaf
-    tr.pCurNode = pCur->GetParent()->GetRightChild()->GetLeftMostChild();
-  } else {
-    // if it is right child, move up
-    tr.pCurNode = pCur->GetParent();
-  }
-  return true;
-}
-
-void RBT ::RetrieveSplits() {
-  // find and store all splits
-  // we do this by retrieving splits in it
-  // note we only store one side of splits, which contains 0
-  TraversRecord tr;
-  InitPostorderTranvers(tr);
-  while (true) {
-    set<int> lvs;
-    tr.pCurNode->GetLeaves(lvs);
-    if (lvs.find(0) != lvs.end()) {
-      // save it
-      if ((int)lvs.size() < this->numLeaves) {
-        mapSplitsInTree.insert(map<set<int>, bool>::value_type(lvs, true));
-      }
-    } else {
-      // store its complement
-      set<int> compls;
-      PopulateSetWithInterval(compls, 0, numLeaves - 1);
-      SubtractSets(compls, lvs);
-      if ((int)lvs.size() < this->numLeaves) {
-        mapSplitsInTree.insert(map<set<int>, bool>::value_type(compls, true));
-      }
-    }
-
-    // move to the next
-    if (NextPostorderTranvers(tr) == false) {
-      break;
-    }
-  }
-}
-
-RBTNode *RBT ::FindLeaf(int lvidParm, int &ponid) {
-  // cout << "FindLeaf: lvidParm = " << lvidParm << endl;
-  // just delegate to the root
-  return this->pRoot->FindLeaf(lvidParm, ponid);
-}
-
-void RBT ::GetLeaves(set<int> &lvs) { pRoot->GetLeaves(lvs); }
-
-void RBT ::Dump() const {
-  pRoot->Dump();
-  cout << endl;
-}
-
-void RBT ::DeleteLeaves(set<int> &lvids) {
-  // delete leaves designated
-  // here is a DUMB method: remove one by one
-  // SLOW! but maybe enough for now. TBD
-  for (set<int>::iterator it = lvids.begin(); it != lvids.end(); ++it) {
-    int id = *it;
-    // int dummy;
-    // find the leave
-    // RBTNode *tnode = FindLeaf( id, dummy);
-    // if( tnode == NULL )
-    //{
-    //	cout << "Warning: leave id = " << id << " is not in the tree.\n";
-    //	continue;
-    //}
-    // remove it
-    if (RemoveLeaf(id) == false) {
-      cout << "Warning: leave id = " << id << " is not in the tree.\n";
-    }
-    // cout << "After deleting leave = " << id << ", tree becomes: ";
-    // Dump();
-  }
-}
-
-void RBT ::RealignLeaves() {
-  // cout << "RealignLeaves: tree = ";
-  // Dump();
-  // sometimes, say after leave is deleted, leaves are no longer contiguous,
-  // this op sets it back to contiguous get all livids first
-  set<int> lvids;
-  GetLeaves(lvids);
-  // convert to a lookup map
-  map<int, int> mapLvidToRank;
-  int rank = 0;
-  for (set<int>::iterator it = lvids.begin(); it != lvids.end(); ++it) {
-    mapLvidToRank.insert(map<int, int>::value_type(*it, rank++));
-  }
-
-  // now traversal the tree and do a traversal and reset leaf ids
-  TraversRecord tr1;
-  InitPostorderTranvers(tr1);
-  while (true) {
-    if (tr1.pCurNode->IsLeaf() == true) {
-      int id = tr1.pCurNode->GetLeafId();
-      YW_ASSERT_INFO(mapLvidToRank.find(id) != mapLvidToRank.end(),
-                     "Leaf must be present");
-      tr1.pCurNode->SetLeafId(mapLvidToRank[id]);
-    }
-    if (NextPostorderTranvers(tr1) == false) {
-      break;
-    }
-  }
-
-  // cout << "RealignLeaves: after realign, tree = ";
-  // Dump();
-
-  // also here also readjust the number of leaves
-  this->numLeaves = lvids.size();
-}
-
-///////////////////////////////////////////////////////////////////////////////
+bool RBT ::NextPostorderTranvers(TraversRecord &tr)
+{
+    // if we are at the root, we are done
+    RBTNode *pCur = tr.pCurNode;
+    if (pCur->GetParent() == NULL)
+    {
+        return false;
+    }
+
+    // if this is the left child, now move to right
+    if (pCur->IsLeftChild() == true)
+    {
+        // start still from the left leaf
+        tr.pCurNode = pCur->GetParent()->GetRightChild()->GetLeftMostChild();
+    }
+    else
+    {
+        // if it is right child, move up
+        tr.pCurNode = pCur->GetParent();
+    }
+    return true;
+}
+
+void RBT ::RetrieveSplits()
+{
+    // find and store all splits
+    // we do this by retrieving splits in it
+    // note we only store one side of splits, which contains 0
+    TraversRecord tr;
+    InitPostorderTranvers(tr);
+    while (true)
+    {
+        set<int> lvs;
+        tr.pCurNode->GetLeaves(lvs);
+        if (lvs.find(0) != lvs.end())
+        {
+            // save it
+            if ((int)lvs.size() < this->numLeaves)
+            {
+                mapSplitsInTree.insert(map<set<int>, bool>::value_type(lvs, true));
+            }
+        }
+        else
+        {
+            // store its complement
+            set<int> compls;
+            PopulateSetWithInterval(compls, 0, numLeaves - 1);
+            SubtractSets(compls, lvs);
+            if ((int)lvs.size() < this->numLeaves)
+            {
+                mapSplitsInTree.insert(map<set<int>, bool>::value_type(compls, true));
+            }
+        }
+
+        // move to the next
+        if (NextPostorderTranvers(tr) == false)
+        {
+            break;
+        }
+    }
+}
+
+RBTNode *RBT ::FindLeaf(int lvidParm, int &ponid)
+{
+    //cout << "FindLeaf: lvidParm = " << lvidParm << endl;
+    // just delegate to the root
+    return this->pRoot->FindLeaf(lvidParm, ponid);
+}
+
+void RBT ::GetLeaves(set<int> &lvs)
+{
+    pRoot->GetLeaves(lvs);
+}
+
+void RBT ::Dump() const
+{
+    pRoot->Dump();
+    cout << endl;
+}
+
+void RBT ::DeleteLeaves(set<int> &lvids)
+{
+    // delete leaves designated
+    // here is a DUMB method: remove one by one
+    // SLOW! but maybe enough for now. TBD
+    for (set<int>::iterator it = lvids.begin(); it != lvids.end(); ++it)
+    {
+        int id = *it;
+        //int dummy;
+        // find the leave
+        //RBTNode *tnode = FindLeaf( id, dummy);
+        //if( tnode == NULL )
+        //{
+        //	cout << "Warning: leave id = " << id << " is not in the tree.\n";
+        //	continue;
+        //}
+        // remove it
+        if (RemoveLeaf(id) == false)
+        {
+            cout << "Warning: leave id = " << id << " is not in the tree.\n";
+        }
+        //cout << "After deleting leave = " << id << ", tree becomes: ";
+        //Dump();
+    }
+}
+
+void RBT ::RealignLeaves()
+{
+    //cout << "RealignLeaves: tree = ";
+    //Dump();
+    // sometimes, say after leave is deleted, leaves are no longer contiguous, this op sets it back to contiguous
+    // get all livids first
+    set<int> lvids;
+    GetLeaves(lvids);
+    // convert to a lookup map
+    map<int, int> mapLvidToRank;
+    int rank = 0;
+    for (set<int>::iterator it = lvids.begin(); it != lvids.end(); ++it)
+    {
+        mapLvidToRank.insert(map<int, int>::value_type(*it, rank++));
+    }
+
+    // now traversal the tree and do a traversal and reset leaf ids
+    TraversRecord tr1;
+    InitPostorderTranvers(tr1);
+    while (true)
+    {
+        if (tr1.pCurNode->IsLeaf() == true)
+        {
+            int id = tr1.pCurNode->GetLeafId();
+            YW_ASSERT_INFO(mapLvidToRank.find(id) != mapLvidToRank.end(), "Leaf must be present");
+            tr1.pCurNode->SetLeafId(mapLvidToRank[id]);
+        }
+        if (NextPostorderTranvers(tr1) == false)
+        {
+            break;
+        }
+    }
+
+    //cout << "RealignLeaves: after realign, tree = ";
+    //Dump();
+
+    // also here also readjust the number of leaves
+    this->numLeaves = lvids.size();
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // Other type of reconstruction
 
-bool RBT ::ReconstructByPlainDesc(const vector<int> &listNodeLabels,
-                                  const vector<int> &listParentNodePos,
-                                  const vector<double> &listEdgeDist) {
-  YW_ASSERT_INFO(listNodeLabels.size() >= 3, "Too small a tree");
-
-  // first step is to get, for each tree node, what are the two children
-  // this helps us in reconstructing the RBT tree. NOTE, we only deal with
-  // non-leaves
-  int numTNodes = listNodeLabels.size();
-  vector<int> listNodeLeftChild, listNodeRightChild;
-  for (int i = 0; i < numTNodes; ++i) {
-    // start with -1 to indicate they are not set
-    listNodeLeftChild.push_back(-1);
-    listNodeRightChild.push_back(-1);
-  }
-  for (int i = 0; i < numTNodes; ++i) {
-    int ppos = listParentNodePos[i];
-    // cout << "i = " << i << ", posi = " << ppos << endl;
-    if (ppos < 0) {
-      // must reach the root
-      break;
-    }
-
-    if (listNodeLeftChild[ppos] < 0) {
-      // save it
-      listNodeLeftChild[ppos] = i;
-    } else if (listNodeRightChild[ppos] < 0) {
-      listNodeRightChild[ppos] = i;
-    } else {
-      YW_ASSERT_INFO(
-          false,
-          "The tree is not binary. We can only handle binary for now.\n");
-    }
-  }
-
-  // cout << "Here..\n";
-  // first clear the old tree if any
-  if (pRoot != NULL) {
-    pRoot->Clear();
-    delete pRoot;
-    pRoot = NULL;
-  }
-  // cout << "Here...\n";
-  // now do for every node
-  vector<RBTNode *> listRBTNodes;
-  for (int i = 0; i < numTNodes; ++i) {
-    YW_ASSERT_INFO((listNodeLeftChild[i] >= 0 && listNodeRightChild[i] >= 0) ||
-                       (listNodeLeftChild[i] < 0 && listNodeRightChild[i] < 0),
-                   "WRONG");
-    // cout << "Adding node-" << i << endl;
-    // if it is leaf
-    if (listNodeLeftChild[i] < 0) {
-      // cout << "A leaf\n";
-      //
-      RBTNode *pn0 = new RBTNode(i);
-
-      // also set height to be 1.0
-      pn0->SetHeight(1.0);
-
-      //
-      listRBTNodes.push_back(pn0);
-    } else {
-      // cout << "Not a leaf\n";
-      // not leaves
-      int pnLeftInd = listNodeLeftChild[i];
-      YW_ASSERT_INFO(pnLeftInd < numTNodes, "Tree node indices wrong");
-      int pnRightInd = listNodeRightChild[i];
-      YW_ASSERT_INFO(pnRightInd < numTNodes, "Tree node indices wrong");
-      RBTNode *pn0 = listRBTNodes[pnLeftInd];
-      RBTNode *pn1 = listRBTNodes[pnRightInd];
-      RBTNode *prn;
-      if (pn0->GetMinLeaveId() < pn1->GetMinLeaveId()) {
-        prn = new RBTNode(pn0, pn1);
-      } else {
-        prn = new RBTNode(pn1, pn0);
-      }
-
-      // height is set by LEFT node (this may cause problem when the right is
-      // NOT consistent for now, we IGNORE this hazard. NOTE, the leaf is
-      // LOWEST, so we decrease
-      double ht = pn0->GetHeight() - listEdgeDist[pnLeftInd];
-      prn->SetHeight(ht);
-
-      //
-      listRBTNodes.push_back(prn);
-    }
-  }
-
-  // set root
-  int numNodesInList = listRBTNodes.size();
-  YW_ASSERT_INFO(numNodesInList == numTNodes,
-                 "Wrong in ReconstructByPlainDesc");
-  this->pRoot = listRBTNodes[numNodesInList - 1];
-
-  return true;
-}
-
-void RBT ::RetrievePlainDesc(int &numLvs, vector<int> &listNodeLabels,
-                             vector<int> &listParentNodePos,
-                             vector<double> &listEdgeDist) {
-  numLvs = this->numLeaves;
-  // init the return params
-  listNodeLabels.clear();
-  listParentNodePos.clear();
-  listEdgeDist.clear();
-  for (int i = 0; i < GetNodesNum(); ++i) {
-    if (i < numLeaves) {
-      listNodeLabels.push_back(i);
-    } else {
-      listNodeLabels.push_back(-1);
-    }
-    listParentNodePos.push_back(-1);
-    listEdgeDist.push_back(-1.0);
-  }
-
-  // form a list of tree nodes
-  // start iteration. Maintain TWO lists: one for leaves and one for internals
-  vector<RBTNode *> listLeafNodes;
-  // leaves are fixed
-  listLeafNodes.resize(this->numLeaves);
-
-  vector<RBTNode *> listInternalNodes;
-  // use a map to quickly find location: well, not very elegent, but QUICK way
-  // to do something
-  map<RBTNode *, int> mapNodeToIndices;
-  TraversRecord tr;
-  InitPostorderTranvers(tr);
-  while (true) {
-    RBTNode *pcnode = tr.pCurNode;
-    if (pcnode->IsLeaf() == true) {
-      int lvid = pcnode->GetLeafId();
-      YW_ASSERT_INFO(lvid >= 0 && lvid < this->numLeaves, "Fail in lvid");
-      listLeafNodes[lvid] = pcnode;
-      // save this node in the COMBINED index
-      mapNodeToIndices.insert(map<RBTNode *, int>::value_type(pcnode, lvid));
-    } else {
-      listInternalNodes.push_back(pcnode);
-      // save this node
-      int ppos = listInternalNodes.size() - 1 + numLeaves;
-      mapNodeToIndices.insert(map<RBTNode *, int>::value_type(pcnode, ppos));
-
-      // also update input for the two children
-      RBTNode *plc = pcnode->GetLeftChild();
-      RBTNode *prc = pcnode->GetRightChild();
-      YW_ASSERT_INFO(mapNodeToIndices.find(plc) != mapNodeToIndices.end(),
-                     "WRONG");
-      YW_ASSERT_INFO(mapNodeToIndices.find(prc) != mapNodeToIndices.end(),
-                     "WRONG");
-      int plcind = mapNodeToIndices[plc];
-      int prcind = mapNodeToIndices[prc];
-      // cout << "set left child node " << plcind << " to " << ppos << endl;
-      // cout << "set right child node " << prcind << " to " << ppos << endl;
-      listParentNodePos[plcind] = ppos;
-      listParentNodePos[prcind] = ppos;
-
-      // set edge length too
-      double htPar = pcnode->GetHeight();
-      double plcHt = plc->GetHeight();
-      double prcHt = prc->GetHeight();
-      if (htPar < 0 || plcHt < 0 || prcHt < 0 || plcHt < htPar ||
-          prcHt < htPar) {
-        // NOT VERY GOOD. TBD. 100707
-        // set some arbitary number
-        listEdgeDist[plcind] = 0.0;
-        listEdgeDist[prcind] = 0.0;
-      } else {
-        // YW_ASSERT_INFO(htPar >= 0.0, "Height not set.");
-        YW_ASSERT_INFO(plcHt >= 0.0 && prcHt >= 0.0, "Height not set.");
-        YW_ASSERT_INFO(plcHt >= htPar && prcHt >= htPar, "Height not set.");
-        listEdgeDist[plcind] = plcHt - htPar;
-        listEdgeDist[prcind] = prcHt - htPar;
-      }
+bool RBT ::ReconstructByPlainDesc(const vector<int> &listNodeLabels, const vector<int> &listParentNodePos,
+                                  const vector<double> &listEdgeDist)
+{
+    YW_ASSERT_INFO(listNodeLabels.size() >= 3, "Too small a tree");
+
+    // first step is to get, for each tree node, what are the two children
+    // this helps us in reconstructing the RBT tree. NOTE, we only deal with non-leaves
+    int numTNodes = listNodeLabels.size();
+    vector<int> listNodeLeftChild, listNodeRightChild;
+    for (int i = 0; i < numTNodes; ++i)
+    {
+        // start with -1 to indicate they are not set
+        listNodeLeftChild.push_back(-1);
+        listNodeRightChild.push_back(-1);
+    }
+    for (int i = 0; i < numTNodes; ++i)
+    {
+        int ppos = listParentNodePos[i];
+        //cout << "i = " << i << ", posi = " << ppos << endl;
+        if (ppos < 0)
+        {
+            // must reach the root
+            break;
+        }
+
+        if (listNodeLeftChild[ppos] < 0)
+        {
+            // save it
+            listNodeLeftChild[ppos] = i;
+        }
+        else if (listNodeRightChild[ppos] < 0)
+        {
+            listNodeRightChild[ppos] = i;
+        }
+        else
+        {
+            YW_ASSERT_INFO(false, "The tree is not binary. We can only handle binary for now.\n");
+        }
     }
 
-    //
-    if (NextPostorderTranvers(tr) == false) {
-      break;
-    }
-  }
-
-  // make sure everything is correct
-  for (int i = 0; i < GetNodesNum() - 1; ++i) {
-    YW_ASSERT_INFO(listParentNodePos[i] >= 0 && listEdgeDist[i] >= 0,
-                   "Some nodes are not correctly set.");
-  }
-}
-
-void RBT ::AugamentDupRows(const vector<REMOVED_ROWS_INFO> &rmLvsStage) {
-  // cout << "Before row augament, tree = ";
-  // Dump();
-
-  // ASSUMPTION: the tree is currently labeled from 0 - numLeaves-1
-  // restore the leaves removed during matrix preprocessing
-  // the list of items is taken out in a step-by-step procedure
-  // IMPORTANT: need to reverse, since we start from the removed items
-  for (int i = (int)rmLvsStage.size() - 1; i >= 0; --i) {
-    // first reset the ids of the leaves
-    // this is how the new ids will be
-    int curLvNum = this->numLeaves;
-    // int numRemoved = rmLvsStage[i].rowsRemoved.size();
-    vector<int> vecRemRows;
-    PopulateVecBySet(vecRemRows, rmLvsStage[i].rowsRemoved);
-    vector<int> listOrigLeaveIds;
-    GetOrigPositionAfterRemoval(curLvNum, vecRemRows, listOrigLeaveIds);
-    // reconfig the leaves
-    // cout << "Now setting leaves during tree augamentation...\n";
-    SetLvids(listOrigLeaveIds);
-
-    // then insert all the deleted rows back in
-    // but first collect tips
-    CollectTips();
-    // now try to put back the removed rows
-    for (int j = 0; j < (int)rmLvsStage[i].pairsRmKeepRows.size(); ++j) {
-      //
-      int rowNew = rmLvsStage[i].pairsRmKeepRows[j].first;
-      YW_ASSERT_INFO(GetTip(rowNew) == NULL,
-                     "Tip is already in"); // should not be already in
-      int existId = rmLvsStage[i].pairsRmKeepRows[j].second;
-      // cout << "existId = " << existId << ", rowNew = " << rowNew << endl;
-      RBTNode *pn = GetTip(existId);
-      YW_ASSERT_INFO(pn != NULL, "Src node not found");
-      pn->AddSiblingToLeaf(rowNew);
-      // cout << "After adding back " << rowNew << " the tree is: ";
-      // Dump();
-    }
-    // update the number of leaves
-    YW_ASSERT_INFO(rmLvsStage[i].pairsRmKeepRows.size() ==
-                       rmLvsStage[i].rowsRemoved.size(),
-                   "Removed record mismatch.");
-    this->numLeaves += rmLvsStage[i].pairsRmKeepRows.size();
-  }
-  // cout << "After row augament, tree = ";
-  // Dump();
-}
-
-void RBT ::SetLvids(const vector<int> &mapLvids) {
-  // configure the name for the leaves
-  // note, that we make assumption: the current leaves are labeled
-  // consecutivatively!!!! OTHERWISE, it will not work wery well perform a
-  // traversal
-  TraversRecord tr;
-  InitPostorderTranvers(tr);
-  while (true) {
-    if (tr.pCurNode->IsLeaf() == true) {
-      // setup leave id
-      int origId = tr.pCurNode->GetLeafId();
-      YW_ASSERT_INFO(origId < (int)mapLvids.size(), "Leaf id is out of range");
-      tr.pCurNode->SetLeafId(mapLvids[origId]);
-      // cout << "Changing leave id from " << origId << " to " <<
-      // mapLvids[origId] << endl;
-    }
-    if (NextPostorderTranvers(tr) == false) {
-      break;
-    }
-  }
-}
-
-void RBT ::SetRoot(RBTNode *pRootNew) {
-  // clear up if there is old root
-  if (this->pRoot != NULL) {
-    delete this->pRoot;
-    this->pRoot = NULL;
-  }
-  YW_ASSERT_INFO(pRootNew != NULL, "Can not be NULL");
-  this->pRoot = pRootNew;
-  mapTipPtrs.clear();
-  mapSplitsInTree.clear();
+    //cout << "Here..\n";
+    // first clear the old tree if any
+    if (pRoot != NULL)
+    {
+        pRoot->Clear();
+        delete pRoot;
+        pRoot = NULL;
+    }
+    //cout << "Here...\n";
+    // now do for every node
+    vector<RBTNode *> listRBTNodes;
+    for (int i = 0; i < numTNodes; ++i)
+    {
+        YW_ASSERT_INFO((listNodeLeftChild[i] >= 0 && listNodeRightChild[i] >= 0) || (listNodeLeftChild[i] < 0 && listNodeRightChild[i] < 0), "WRONG");
+        //cout << "Adding node-" << i << endl;
+        // if it is leaf
+        if (listNodeLeftChild[i] < 0)
+        {
+            //cout << "A leaf\n";
+            //
+            RBTNode *pn0 = new RBTNode(i);
+
+            // also set height to be 1.0
+            pn0->SetHeight(1.0);
+
+            //
+            listRBTNodes.push_back(pn0);
+        }
+        else
+        {
+            //cout << "Not a leaf\n";
+            // not leaves
+            int pnLeftInd = listNodeLeftChild[i];
+            YW_ASSERT_INFO(pnLeftInd < numTNodes, "Tree node indices wrong");
+            int pnRightInd = listNodeRightChild[i];
+            YW_ASSERT_INFO(pnRightInd < numTNodes, "Tree node indices wrong");
+            RBTNode *pn0 = listRBTNodes[pnLeftInd];
+            RBTNode *pn1 = listRBTNodes[pnRightInd];
+            RBTNode *prn;
+            if (pn0->GetMinLeaveId() < pn1->GetMinLeaveId())
+            {
+                prn = new RBTNode(pn0, pn1);
+            }
+            else
+            {
+                prn = new RBTNode(pn1, pn0);
+            }
+
+            // height is set by LEFT node (this may cause problem when the right is NOT consistent
+            // for now, we IGNORE this hazard. NOTE, the leaf is LOWEST, so we decrease
+            double ht = pn0->GetHeight() - listEdgeDist[pnLeftInd];
+            prn->SetHeight(ht);
+
+            //
+            listRBTNodes.push_back(prn);
+        }
+    }
+
+    // set root
+    int numNodesInList = listRBTNodes.size();
+    YW_ASSERT_INFO(numNodesInList == numTNodes, "Wrong in ReconstructByPlainDesc");
+    this->pRoot = listRBTNodes[numNodesInList - 1];
+
+    return true;
+}
+
+void RBT ::RetrievePlainDesc(int &numLvs, vector<int> &listNodeLabels, vector<int> &listParentNodePos,
+                             vector<double> &listEdgeDist)
+{
+    numLvs = this->numLeaves;
+    // init the return params
+    listNodeLabels.clear();
+    listParentNodePos.clear();
+    listEdgeDist.clear();
+    for (int i = 0; i < GetNodesNum(); ++i)
+    {
+        if (i < numLeaves)
+        {
+            listNodeLabels.push_back(i);
+        }
+        else
+        {
+            listNodeLabels.push_back(-1);
+        }
+        listParentNodePos.push_back(-1);
+        listEdgeDist.push_back(-1.0);
+    }
+
+    // form a list of tree nodes
+    // start iteration. Maintain TWO lists: one for leaves and one for internals
+    vector<RBTNode *> listLeafNodes;
+    // leaves are fixed
+    listLeafNodes.resize(this->numLeaves);
+
+    vector<RBTNode *> listInternalNodes;
+    // use a map to quickly find location: well, not very elegent, but QUICK way to do something
+    map<RBTNode *, int> mapNodeToIndices;
+    TraversRecord tr;
+    InitPostorderTranvers(tr);
+    while (true)
+    {
+        RBTNode *pcnode = tr.pCurNode;
+        if (pcnode->IsLeaf() == true)
+        {
+            int lvid = pcnode->GetLeafId();
+            YW_ASSERT_INFO(lvid >= 0 && lvid < this->numLeaves, "Fail in lvid");
+            listLeafNodes[lvid] = pcnode;
+            // save this node in the COMBINED index
+            mapNodeToIndices.insert(map<RBTNode *, int>::value_type(pcnode, lvid));
+        }
+        else
+        {
+            listInternalNodes.push_back(pcnode);
+            // save this node
+            int ppos = listInternalNodes.size() - 1 + numLeaves;
+            mapNodeToIndices.insert(map<RBTNode *, int>::value_type(pcnode, ppos));
+
+            // also update input for the two children
+            RBTNode *plc = pcnode->GetLeftChild();
+            RBTNode *prc = pcnode->GetRightChild();
+            YW_ASSERT_INFO(mapNodeToIndices.find(plc) != mapNodeToIndices.end(), "WRONG");
+            YW_ASSERT_INFO(mapNodeToIndices.find(prc) != mapNodeToIndices.end(), "WRONG");
+            int plcind = mapNodeToIndices[plc];
+            int prcind = mapNodeToIndices[prc];
+            //cout << "set left child node " << plcind << " to " << ppos << endl;
+            //cout << "set right child node " << prcind << " to " << ppos << endl;
+            listParentNodePos[plcind] = ppos;
+            listParentNodePos[prcind] = ppos;
+
+            // set edge length too
+            double htPar = pcnode->GetHeight();
+            double plcHt = plc->GetHeight();
+            double prcHt = prc->GetHeight();
+            if (htPar < 0 || plcHt < 0 || prcHt < 0 || plcHt < htPar || prcHt < htPar)
+            {
+                // NOT VERY GOOD. TBD. 100707
+                // set some arbitary number
+                listEdgeDist[plcind] = 0.0;
+                listEdgeDist[prcind] = 0.0;
+            }
+            else
+            {
+                // YW_ASSERT_INFO(htPar >= 0.0, "Height not set.");
+                YW_ASSERT_INFO(plcHt >= 0.0 && prcHt >= 0.0, "Height not set.");
+                YW_ASSERT_INFO(plcHt >= htPar && prcHt >= htPar, "Height not set.");
+                listEdgeDist[plcind] = plcHt - htPar;
+                listEdgeDist[prcind] = prcHt - htPar;
+            }
+        }
+
+        //
+        if (NextPostorderTranvers(tr) == false)
+        {
+            break;
+        }
+    }
+
+    // make sure everything is correct
+    for (int i = 0; i < GetNodesNum() - 1; ++i)
+    {
+        YW_ASSERT_INFO(listParentNodePos[i] >= 0 &&
+                           listEdgeDist[i] >= 0,
+                       "Some nodes are not correctly set.");
+    }
+}
+
+void RBT ::AugamentDupRows(const vector<REMOVED_ROWS_INFO> &rmLvsStage)
+{
+    //cout << "Before row augament, tree = ";
+    //Dump();
+
+    // ASSUMPTION: the tree is currently labeled from 0 - numLeaves-1
+    // restore the leaves removed during matrix preprocessing
+    // the list of items is taken out in a step-by-step procedure
+    // IMPORTANT: need to reverse, since we start from the removed items
+    for (int i = (int)rmLvsStage.size() - 1; i >= 0; --i)
+    {
+        // first reset the ids of the leaves
+        // this is how the new ids will be
+        int curLvNum = this->numLeaves;
+        //int numRemoved = rmLvsStage[i].rowsRemoved.size();
+        vector<int> vecRemRows;
+        PopulateVecBySet(vecRemRows, rmLvsStage[i].rowsRemoved);
+        vector<int> listOrigLeaveIds;
+        GetOrigPositionAfterRemoval(curLvNum, vecRemRows, listOrigLeaveIds);
+        // reconfig the leaves
+        //cout << "Now setting leaves during tree augamentation...\n";
+        SetLvids(listOrigLeaveIds);
+
+        // then insert all the deleted rows back in
+        // but first collect tips
+        CollectTips();
+        // now try to put back the removed rows
+        for (int j = 0; j < (int)rmLvsStage[i].pairsRmKeepRows.size(); ++j)
+        {
+            //
+            int rowNew = rmLvsStage[i].pairsRmKeepRows[j].first;
+            YW_ASSERT_INFO(GetTip(rowNew) == NULL, "Tip is already in"); // should not be already in
+            int existId = rmLvsStage[i].pairsRmKeepRows[j].second;
+            //cout << "existId = " << existId << ", rowNew = " << rowNew << endl;
+            RBTNode *pn = GetTip(existId);
+            YW_ASSERT_INFO(pn != NULL, "Src node not found");
+            pn->AddSiblingToLeaf(rowNew);
+            //cout << "After adding back " << rowNew << " the tree is: ";
+            //Dump();
+        }
+        // update the number of leaves
+        YW_ASSERT_INFO(rmLvsStage[i].pairsRmKeepRows.size() == rmLvsStage[i].rowsRemoved.size(), "Removed record mismatch.");
+        this->numLeaves += rmLvsStage[i].pairsRmKeepRows.size();
+    }
+    //cout << "After row augament, tree = ";
+    //Dump();
+}
+
+void RBT ::SetLvids(const vector<int> &mapLvids)
+{
+    // configure the name for the leaves
+    // note, that we make assumption: the current leaves are labeled consecutivatively!!!!
+    // OTHERWISE, it will not work wery well
+    // perform a traversal
+    TraversRecord tr;
+    InitPostorderTranvers(tr);
+    while (true)
+    {
+        if (tr.pCurNode->IsLeaf() == true)
+        {
+            // setup leave id
+            int origId = tr.pCurNode->GetLeafId();
+            YW_ASSERT_INFO(origId < (int)mapLvids.size(), "Leaf id is out of range");
+            tr.pCurNode->SetLeafId(mapLvids[origId]);
+            //cout << "Changing leave id from " << origId << " to " << mapLvids[origId] << endl;
+        }
+        if (NextPostorderTranvers(tr) == false)
+        {
+            break;
+        }
+    }
+}
+
+void RBT ::SetRoot(RBTNode *pRootNew)
+{
+    // clear up if there is old root
+    if (this->pRoot != NULL)
+    {
+        delete this->pRoot;
+        this->pRoot = NULL;
+    }
+    YW_ASSERT_INFO(pRootNew != NULL, "Can not be NULL");
+    this->pRoot = pRootNew;
+    mapTipPtrs.clear();
+    mapSplitsInTree.clear();
 }
diff --git a/trisicell/external/scistree/RBT.h b/trisicell/external/scistree/RBT.h
index 122ed0b..562cf1a 100644
--- a/trisicell/external/scistree/RBT.h
+++ b/trisicell/external/scistree/RBT.h
@@ -2,14 +2,14 @@
 #define RBT_H
 
 //
-#include <fstream>
 #include <iostream>
+#include <fstream>
 #include <vector>
 using namespace std;
 
-#include "BinaryMatrix.h"
 #include "Utils.h"
 #include "Utils2.h"
+#include "BinaryMatrix.h"
 
 // define a leaf-labeled rooted binary tree
 // note that we do not store the leaf label explicitly
@@ -21,83 +21,83 @@ using namespace std;
 // than minimum right leaves
 
 // a class for tree node
-class RBTNode {
+class RBTNode
+{
 public:
-  // create a leaf node
-  RBTNode(int lvid)
-      : pLeft(NULL), pRight(NULL), pParent(NULL), lvid(lvid), height(0.0) {}
-  // create an internal node with two
-  RBTNode(RBTNode *pLeft, RBTNode *pRight);
-  ~RBTNode() { Clear(); }
-
-  // operation
-  void SetHeight(double ht) { height = ht; }
-  double GetHeight() const { return height; }
-  RBTNode *CopySubTree();
-  void AddToLeftEdge(int lvid);
-  void AddToRightEdge(int lvid);
-  RBTNode *AddSibling(int lvid);
-  void AddSiblingToLeaf(int lvid);
-  RBTNode *FindLeaf(int lvid,
-                    int &ponid); // IMPORTANT, in traversal,
-                                 // assume post-order search, and return the how
-                                 // many nodes visited so far
-  bool RemoveLeafSelf();         // only remove self if it is a leaf
-  void DetachSubtree();          // detach this node from the rest of the tree
-  RBTNode *AttachSubtree(RBTNode *pSib);
-
-  // access
-  RBTNode *GetLeftChild() const { return pLeft; }
-  RBTNode *GetRightChild() const { return pRight; }
-  RBTNode *GetParent() { return pParent; }
-  RBTNode *GetSibling();
-  void SetLeftChild(RBTNode *pLeft) { this->pLeft = pLeft; }
-  void SetRightChild(RBTNode *pRight) { this->pRight = pRight; }
-  void SetParent(RBTNode *pParent) { this->pParent = pParent; }
-  int GetLeafId() { return lvid; }
-  void SetLeafId(int idNew) { this->lvid = idNew; }
-  RBTNode *GetLeftMostChild();
-  int GetMinLeaveId();
-  void GetLeaves(set<int> &lvs);
-  bool IsLeaf() const;
-  int GetNumLeavesUnder();
-  bool IsLeftChild();
-  bool IsRoot() { return this->pParent == NULL; }
-  void Dump() const;
-  string GetNewick() const;
-  void OutputNodeGML(ofstream &ofs);
-  void OutputEdgeGML(ofstream &ofs);
-
-  // memory. free recursively
-  void Clear();
+    // create a leaf node
+    RBTNode(int lvid) : pLeft(NULL), pRight(NULL), pParent(NULL), lvid(lvid), height(0.0) {}
+    // create an internal node with two
+    RBTNode(RBTNode *pLeft, RBTNode *pRight);
+    ~RBTNode() { Clear(); }
+
+    // operation
+    void SetHeight(double ht) { height = ht; }
+    double GetHeight() const { return height; }
+    RBTNode *CopySubTree();
+    void AddToLeftEdge(int lvid);
+    void AddToRightEdge(int lvid);
+    RBTNode *AddSibling(int lvid);
+    void AddSiblingToLeaf(int lvid);
+    RBTNode *FindLeaf(int lvid, int &ponid); // IMPORTANT, in traversal,
+                                             // assume post-order search, and return the how many nodes visited so far
+    bool RemoveLeafSelf();                   // only remove self if it is a leaf
+    void DetachSubtree();                    // detach this node from the rest of the tree
+    RBTNode *AttachSubtree(RBTNode *pSib);
+
+    // access
+    RBTNode *GetLeftChild() const { return pLeft; }
+    RBTNode *GetRightChild() const { return pRight; }
+    RBTNode *GetParent() { return pParent; }
+    RBTNode *GetSibling();
+    void SetLeftChild(RBTNode *pLeft) { this->pLeft = pLeft; }
+    void SetRightChild(RBTNode *pRight) { this->pRight = pRight; }
+    void SetParent(RBTNode *pParent) { this->pParent = pParent; }
+    int GetLeafId() { return lvid; }
+    void SetLeafId(int idNew) { this->lvid = idNew; }
+    RBTNode *GetLeftMostChild();
+    int GetMinLeaveId();
+    void GetLeaves(set<int> &lvs);
+    bool IsLeaf() const;
+    int GetNumLeavesUnder();
+    bool IsLeftChild();
+    bool IsRoot() { return this->pParent == NULL; }
+    void Dump() const;
+    string GetNewick() const;
+    void OutputNodeGML(ofstream &ofs);
+    void OutputEdgeGML(ofstream &ofs);
+
+    // memory. free recursively
+    void Clear();
 
 private:
-  void AdjustLRChildUpwards();
+    void AdjustLRChildUpwards();
 
-  // two children
-  RBTNode *pLeft;
-  RBTNode *pRight;
-  RBTNode *pParent;
-  int lvid;
-  double height; // useful in some situations, normalized to between 0-1
+    // two children
+    RBTNode *pLeft;
+    RBTNode *pRight;
+    RBTNode *pParent;
+    int lvid;
+    double height; // useful in some situations, normalized to between 0-1
 
-  // utility
-  static int idNodeNextToUse;
+    // utility
+    static int idNodeNextToUse;
 };
 
 // define triplets
 // Triplets are important for rooted tree, since the set of triplets
 // uniquely define a RBT
-typedef struct {
-  // note by convention, a < b. But c is on the other side of partition (a,b), c
-  int a;
-  int b;
-  int c;
+typedef struct
+{
+    // note by convention, a < b. But c is on the other side of partition (a,b), c
+    int a;
+    int b;
+    int c;
 } TripleLeaves;
 
 // define for traversal
-typedef struct {
-  RBTNode *pCurNode;
+typedef struct
+{
+    RBTNode *pCurNode;
 } TraversRecord;
 
 // sometimes, we want to an ID for the tree
@@ -107,110 +107,101 @@ typedef struct {
 typedef int RBT_ID;
 
 // main class
-class RBT {
+class RBT
+{
 public:
-  // different ways of initializing a tree
-  // it can be by a supplied id
-  RBT(int numLeaves, RBT_ID tid);
-  RBT(const RBT &rhs);
-  // interop with simple representation
-  RBT(int numLeaves, const vector<int> &listNodeLabels,
-      const vector<int> &listParentNodePos, const vector<double> &listEdgeDist);
-  RBT &operator=(const RBT &rhs);
-  // bool operator == (const RBT &rhs) { return IsSame(rhs); }
-  ~RBT();
-
-  // ID functions
-  RBT_ID GetId();
-  RBT_ID MapToId();
-  bool IsSame(const RBT &tr) const;
-
-  // splits functions
-  bool IsSplitContained(const set<int> &split); // test whether a split is in
-                                                // the tree
-  void GetAllSplits(vector<set<int> > &listSplits);
-
-  // SPR function
-  void FindSPRDistOneNgbrs(set<int> &ngbrIds);
-  void FindSPRDistOneNgbrs(vector<RBT *> &ngbrTrees);
-  void FindSPRDistOneNgbrsRestricted(vector<RBT *> &ngbrTrees,
-                                     const vector<RBT *> &ConstraintTrees);
-  bool IsOneSPRAway(const RBT &rbt) const; // testing whether it is one or two
-                                           // SPR away
-  bool IsTwoSPRAway(const RBT &rbt) const;
-  static void Consolidate(RBT &treeOpt, RBT &treeCmp);
-
-  // editing
-  bool RemoveLeaf(int lvid);
-  void ReconstructById(RBT_ID tid);
-  bool ReconstructNewick(const string &strNewick);
-  void PruneLargeIdNodes(int idThres);
-  void DeleteLeaves(set<int> &lvids); // delete leaves designated
-  void RealignLeaves(); // sometimes, say after leave is deleted, leaves are no
-                        // longer contiguous, this op sets it back to contiguous
-  void AugamentDupRows(
-      const vector<REMOVED_ROWS_INFO> &rmLvsStage); // restore the leaves
-                                                    // removed during matrix
-                                                    // preprocessing
-  void SetRoot(RBTNode *pRootNew);
-  RBTNode *GetRoot() { return pRoot; }
-
-  // dynamic functions: allow adding new nodes
-  bool AddLeaf(int pos);
-
-  // access
-  void GetLeaves(set<int> &lvs);
-  void Dump() const;
-  void OutputGML(const char *fileName);
-
-  // Int-op with another format
-  void RetrievePlainDesc(int &numLeaves, vector<int> &listNodeLabels,
-                         vector<int> &listParentNodePos,
-                         vector<double> &listEdgeDist);
-  int GetNodesNum() { return 2 * numLeaves - 1; }
-  string GetNewick() const;
-  int GetLeafNum() { return numLeaves; }
-  bool IsEmpty() const { return pRoot == NULL && numLeaves == 0; }
-
-  // compare
-  int Compare(RBT &rhs);
-  bool IsSameUnrootedTree(RBT &rhs);
-  void CollectTips();
-  RBTNode *GetTip(int id);
-  void GetAllTips(vector<RBTNode *> &tips);
+    // different ways of initializing a tree
+    // it can be by a supplied id
+    RBT(int numLeaves, RBT_ID tid);
+    RBT(const RBT &rhs);
+    // interop with simple representation
+    RBT(int numLeaves, const vector<int> &listNodeLabels, const vector<int> &listParentNodePos,
+        const vector<double> &listEdgeDist);
+    RBT &operator=(const RBT &rhs);
+    //bool operator == (const RBT &rhs) { return IsSame(rhs); }
+    ~RBT();
+
+    // ID functions
+    RBT_ID GetId();
+    RBT_ID MapToId();
+    bool IsSame(const RBT &tr) const;
+
+    // splits functions
+    bool IsSplitContained(const set<int> &split); // test whether a split is in the tree
+    void GetAllSplits(vector<set<int>> &listSplits);
+
+    // SPR function
+    void FindSPRDistOneNgbrs(set<int> &ngbrIds);
+    void FindSPRDistOneNgbrs(vector<RBT *> &ngbrTrees);
+    void FindSPRDistOneNgbrsRestricted(vector<RBT *> &ngbrTrees, const vector<RBT *> &ConstraintTrees);
+    bool IsOneSPRAway(const RBT &rbt) const; // testing whether it is one or two SPR away
+    bool IsTwoSPRAway(const RBT &rbt) const;
+    static void Consolidate(RBT &treeOpt, RBT &treeCmp);
+
+    // editing
+    bool RemoveLeaf(int lvid);
+    void ReconstructById(RBT_ID tid);
+    bool ReconstructNewick(const string &strNewick);
+    void PruneLargeIdNodes(int idThres);
+    void DeleteLeaves(set<int> &lvids);                                // delete leaves designated
+    void RealignLeaves();                                              // sometimes, say after leave is deleted, leaves are no longer contiguous, this op sets it back to contiguous
+    void AugamentDupRows(const vector<REMOVED_ROWS_INFO> &rmLvsStage); // restore the leaves removed during matrix preprocessing
+    void SetRoot(RBTNode *pRootNew);
+    RBTNode *GetRoot() { return pRoot; }
+
+    // dynamic functions: allow adding new nodes
+    bool AddLeaf(int pos);
+
+    // access
+    void GetLeaves(set<int> &lvs);
+    void Dump() const;
+    void OutputGML(const char *fileName);
+
+    // Int-op with another format
+    void RetrievePlainDesc(int &numLeaves, vector<int> &listNodeLabels, vector<int> &listParentNodePos,
+                           vector<double> &listEdgeDist);
+    int GetNodesNum() { return 2 * numLeaves - 1; }
+    string GetNewick() const;
+    int GetLeafNum() { return numLeaves; }
+    bool IsEmpty() const { return pRoot == NULL && numLeaves == 0; }
+
+    // compare
+    int Compare(RBT &rhs);
+    bool IsSameUnrootedTree(RBT &rhs);
+    void CollectTips();
+    RBTNode *GetTip(int id);
+    void GetAllTips(vector<RBTNode *> &tips);
 
 private:
-  RBT() {}     // do not allow default construction
-  void Init(); // common initialization
-  // void ConsTripleMap();   // save all the triples
-  // support traversal
-  bool InitPostorderTranvers(TraversRecord &tr);
-  bool NextPostorderTranvers(TraversRecord &tr);
-  void RetrieveSplits();
-  RBTNode *FindLeaf(int lvid, int &ponid);
-  RBTNode *ReconstructNewickInternal(const string &strNewick);
-  bool InternalAddleaf(int lvid, int pos);
-  bool ReconstructByPlainDesc(const vector<int> &listNodeLabels,
-                              const vector<int> &listParentNodePos,
-                              const vector<double> &listEdgeDist);
-  void SetLvids(const vector<int> &mapLvids); // configure the name for the
-                                              // leaves
-
-  // save a dynamic root node
-  RBTNode *pRoot;
-
-  // we also save the splits
-  map<set<int>, bool> mapSplitsInTree;
-  map<int, RBTNode *> mapTipPtrs;
-
-  // note we do not normally allow morhping the tree
-  // EXCEPT during initialtion. Since convert to id
-  // can be slow, we cache it
-  int numLeaves;
-  RBT_ID tid;
-
-  // collect of triples
-  // map< TripleLeaves, bool > mapTriples;
+    RBT() {}     // do not allow default construction
+    void Init(); // common initialization
+    //void ConsTripleMap();   // save all the triples
+    // support traversal
+    bool InitPostorderTranvers(TraversRecord &tr);
+    bool NextPostorderTranvers(TraversRecord &tr);
+    void RetrieveSplits();
+    RBTNode *FindLeaf(int lvid, int &ponid);
+    RBTNode *ReconstructNewickInternal(const string &strNewick);
+    bool InternalAddleaf(int lvid, int pos);
+    bool ReconstructByPlainDesc(const vector<int> &listNodeLabels, const vector<int> &listParentNodePos,
+                                const vector<double> &listEdgeDist);
+    void SetLvids(const vector<int> &mapLvids); // configure the name for the leaves
+
+    // save a dynamic root node
+    RBTNode *pRoot;
+
+    // we also save the splits
+    map<set<int>, bool> mapSplitsInTree;
+    map<int, RBTNode *> mapTipPtrs;
+
+    // note we do not normally allow morhping the tree
+    // EXCEPT during initialtion. Since convert to id
+    // can be slow, we cache it
+    int numLeaves;
+    RBT_ID tid;
+
+    // collect of triples
+    //map< TripleLeaves, bool > mapTriples;
 };
 
 ///////////////////////////////////////////////////////////////////////////////////////
diff --git a/trisicell/external/scistree/RerootTreeUtils.cpp b/trisicell/external/scistree/RerootTreeUtils.cpp
index f10501e..7f9928e 100644
--- a/trisicell/external/scistree/RerootTreeUtils.cpp
+++ b/trisicell/external/scistree/RerootTreeUtils.cpp
@@ -1,179 +1,207 @@
-#include "RerootTreeUtils.h"
+#include <iostream>
+#include <fstream>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
-#include <deque>
-#include <fstream>
-#include <iostream>
+#include <vector>
+#include <string>
 #include <map>
-#include <queue>
 #include <set>
+#include <queue>
+#include <deque>
 #include <stack>
-#include <string>
-#include <vector>
+#include "RerootTreeUtils.h"
 
 using namespace std;
 
-void split(string &content, vector<string> &elements) {
-  elements.clear();
-  string tmp;
-  for (int i = 0; i < (int)content.size(); ++i) {
-    if (content[i] == ',' || content[i] == ':' || content[i] == '(' ||
-        content[i] == ')') {
-      if (!tmp.empty())
-        elements.push_back(tmp);
-      char ch[2] = { content[i], 0 };
-      elements.push_back(string(ch));
-      tmp.clear();
-      continue;
-    } else if ((content[i] >= '0' && content[i] <= '9') ||
-               (content[i] >= 'A' && content[i] <= 'Z') ||
-               (content[i] >= 'a' && content[i] <= 'z') || content[i] == '.') {
-      const char ch[2] = { content[i], 0 };
-      tmp.append(ch);
+void split(string &content, vector<string> &elements)
+{
+    elements.clear();
+    string tmp;
+    for (int i = 0; i < (int)content.size(); ++i)
+    {
+        if (content[i] == ',' || content[i] == ':' || content[i] == '(' || content[i] == ')')
+        {
+            if (!tmp.empty())
+                elements.push_back(tmp);
+            char ch[2] = {content[i], 0};
+            elements.push_back(string(ch));
+            tmp.clear();
+            continue;
+        }
+        else if ((content[i] >= '0' && content[i] <= '9') ||
+                 (content[i] >= 'A' && content[i] <= 'Z') ||
+                 (content[i] >= 'a' && content[i] <= 'z') || content[i] == '.')
+        {
+            const char ch[2] = {content[i], 0};
+            tmp.append(ch);
+        }
     }
-  }
 }
-struct Edge {
-  int a;
-  double weight;
-  Edge(int a, double weight) {
-    this->a = a;
-    this->weight = weight;
-  }
-  bool operator<(const Edge &edge) const {
-    if (a != edge.a)
-      return a < edge.a;
-    return weight < edge.weight;
-  }
+struct Edge
+{
+    int a;
+    double weight;
+    Edge(int a, double weight)
+    {
+        this->a = a;
+        this->weight = weight;
+    }
+    bool operator<(const Edge &edge) const
+    {
+        if (a != edge.a)
+            return a < edge.a;
+        return weight < edge.weight;
+    }
 };
 
-double stringToDouble(string &content) {
-  double ret = 0;
-  int i = 0;
-  for (; i < content.size() && content[i] != '.'; ++i) {
-    if (content[i] < '0' || content[i] > '9') {
-      printf("input tree string is not right\n");
-      exit(0);
+double stringToDouble(string &content)
+{
+    double ret = 0;
+    int i = 0;
+    for (; i < content.size() && content[i] != '.'; ++i)
+    {
+        if (content[i] < '0' || content[i] > '9')
+        {
+            printf("input tree string is not right\n");
+            exit(0);
+        }
+        ret = ret * 10 + content[i] - '0';
     }
-    ret = ret * 10 + content[i] - '0';
-  }
-  double x = 0;
-  if (content[i] == '.') {
-    for (int j = content.size() - 1; j > i; --j) {
-      if (content[j] < '0' || content[j] > '9') {
-        printf("input tree string is not right\n");
-        exit(0);
-      }
-      x = x * 0.1 + content[j] - '0';
+    double x = 0;
+    if (content[i] == '.')
+    {
+        for (int j = content.size() - 1; j > i; --j)
+        {
+            if (content[j] < '0' || content[j] > '9')
+            {
+                printf("input tree string is not right\n");
+                exit(0);
+            }
+            x = x * 0.1 + content[j] - '0';
+        }
     }
-  }
-  x = x * 0.1;
-  return ret + x;
+    x = x * 0.1;
+    return ret + x;
 }
 
-int stringToInt(string &content) {
-  int ret = 0;
-  for (int i = 0; i < content.size(); ++i)
-    ret = ret * 10 + content[i] - '0';
-  return ret;
-}
-void buildGraph(vector<string> &elements, map<int, map<int, double> > &graph,
-                map<string, int> &leaf_to_label) {
-  graph.clear();
-  stack<char> s1;
-  stack<Edge> s2;
-  int a = -1;
-  int cc = 0;
-  for (int i = 0; i < elements.size(); ++i) {
-    if (elements[i].compare("(") == 0) {
-      s1.push('(');
-    } else if (elements[i].compare(",") == 0) {
-      s1.push(',');
-    } else if (elements[i].compare(":") == 0) {
-      s1.push(':');
-    } else if (elements[i].compare(")") == 0) {
-      if (s1.empty() || s1.top() != ',') {
-        printf("input tree string is not right\n");
-        exit(0);
-      }
-      s1.pop();
-      if (s1.empty() || s1.top() != '(') {
-        printf("input tree string is not right\n");
-        exit(0);
-      }
-      s1.pop();
-      a = cc;
-      if ((int)s2.size() - 2 < 0) {
+void buildGraph(vector<string> &elements, map<int, map<int, double>> &graph, map<string, int> &leaf_to_label)
+{
+    graph.clear();
+    stack<char> s1;
+    stack<Edge> s2;
+    int a = -1;
+    int cc = 0;
+    for (int i = 0; i < elements.size(); ++i)
+    {
+        if (elements[i].compare("(") == 0)
+        {
+            s1.push('(');
+        }
+        else if (elements[i].compare(",") == 0)
+        {
+            s1.push(',');
+        }
+        else if (elements[i].compare(":") == 0)
+        {
+            s1.push(':');
+        }
+        else if (elements[i].compare(")") == 0)
+        {
+            if (s1.empty() || s1.top() != ',')
+            {
+                printf("input tree string is not right\n");
+                exit(0);
+            }
+            s1.pop();
+            if (s1.empty() || s1.top() != '(')
+            {
+                printf("input tree string is not right\n");
+                exit(0);
+            }
+            s1.pop();
+            a = cc;
+            if ((int)s2.size() - 2 < 0)
+            {
+                printf("input tree string is not right\n");
+                exit(0);
+            }
+            graph[a][s2.top().a] = s2.top().weight;
+            graph[s2.top().a][a] = s2.top().weight;
+            s2.pop();
+            graph[a][s2.top().a] = s2.top().weight;
+            graph[s2.top().a][a] = s2.top().weight;
+            s2.pop();
+            cc++;
+        }
+        else
+        {
+            if (s1.top() != ':')
+            {
+                a = cc;
+                leaf_to_label[elements[i]] = cc;
+                cc++;
+            }
+            else
+            {
+                double xx = stringToDouble(elements[i]);
+                if (a == -1)
+                {
+                    printf("input tree string is not right\n");
+                    exit(0);
+                }
+                s1.pop();
+                s2.push(Edge(a, xx));
+                a = -1;
+            }
+        }
+    }
+    if (!s1.empty() || !s2.empty())
+    {
         printf("input tree string is not right\n");
         exit(0);
-      }
-      graph[a][s2.top().a] = s2.top().weight;
-      graph[s2.top().a][a] = s2.top().weight;
-      s2.pop();
-      graph[a][s2.top().a] = s2.top().weight;
-      graph[s2.top().a][a] = s2.top().weight;
-      s2.pop();
-      cc++;
-    } else {
-      if (s1.top() != ':') {
-        a = cc;
-        leaf_to_label[elements[i]] = cc;
-        cc++;
-      } else {
-        double xx = stringToDouble(elements[i]);
-        if (a == -1) {
-          printf("input tree string is not right\n");
-          exit(0);
-        }
-        s1.pop();
-        s2.push(Edge(a, xx));
-        a = -1;
-      }
     }
-  }
-  if (!s1.empty() || !s2.empty()) {
-    printf("input tree string is not right\n");
-    exit(0);
-  }
 }
 
-string convert(char *content, char *new_root) {
-  string strRes;
-  if (content == NULL || new_root == NULL)
-    return strRes;
-  string tree_str(content);
-  vector<string> elements;
-  split(tree_str, elements);
-  map<int, map<int, double> > graph;
-  map<string, int> leaf_to_label;
-  buildGraph(elements, graph, leaf_to_label);
+string convert(char *content, char *new_root)
+{
+    string strRes;
+    if (content == NULL || new_root == NULL)
+        return strRes;
+    string tree_str(content);
+    vector<string> elements;
+    split(tree_str, elements);
+    map<int, map<int, double>> graph;
+    map<string, int> leaf_to_label;
+    buildGraph(elements, graph, leaf_to_label);
 
-  string new_root_str(new_root);
-  if (leaf_to_label.find(new_root_str) == leaf_to_label.end()) {
-    printf("No such root %s\n", new_root);
-    exit(0);
-  }
-  int nr = graph.size();
-  int xx = -1;
-  double yy = 0;
+    string new_root_str(new_root);
+    if (leaf_to_label.find(new_root_str) == leaf_to_label.end())
+    {
+        printf("No such root %s\n", new_root);
+        exit(0);
+    }
+    int nr = graph.size();
+    int xx = -1;
+    double yy = 0;
 
-  // modify graph, add new root
-  int nl = leaf_to_label[new_root_str];
-  for (map<int, map<int, double> >::iterator iter = graph.begin();
-       iter != graph.end(); ++iter) {
-    if (iter->second.find(nl) != iter->second.end()) {
-      yy = iter->second[nl];
-      xx = iter->first;
-      iter->second[nr] = yy / 2;
-      iter->second.erase(nl);
-      break;
+    //modify graph, add new root
+    int nl = leaf_to_label[new_root_str];
+    for (map<int, map<int, double>>::iterator iter = graph.begin(); iter != graph.end(); ++iter)
+    {
+        if (iter->second.find(nl) != iter->second.end())
+        {
+            yy = iter->second[nl];
+            xx = iter->first;
+            iter->second[nr] = yy / 2;
+            iter->second.erase(nl);
+            break;
+        }
     }
-  }
-  graph[nr][xx] = yy / 2;
-  graph[nr][nl] = yy / 2;
-  graph[nl].clear();
-  graph[nl][nr] = yy / 2;
+    graph[nr][xx] = yy / 2;
+    graph[nr][nl] = yy / 2;
+    graph[nl].clear();
+    graph[nl][nr] = yy / 2;
 #if 0
 printf("graph\n");
 for (map<int, map<int, double> >::iterator iter1 =graph.begin();iter1!=graph.end();++iter1) {
@@ -184,37 +212,39 @@ printf("(%d,%lf) ", iter2->first, iter2->second);
 printf("\n");
 }
 #endif
-  // bfs, get new weight
-  int n = graph.size();
-  vector<double> wei;
-  vector<bool> flag;
-  wei.reserve(n);
-  flag.reserve(n);
-  for (int i = 0; i < n; i++) {
-    flag.push_back(false);
-    wei.push_back(0);
-  }
-  queue<int> qu;
-  qu.push(nr);
-  flag[nr] = true;
-  map<int, set<int> > tree;
-  map<int, int> parent;
-  while (!qu.empty()) {
-    int t = qu.front();
-    qu.pop();
-    if (graph.find(t) == graph.end())
-      continue;
-    for (map<int, double>::iterator iter = graph[t].begin();
-         iter != graph[t].end(); ++iter) {
-      if (flag[iter->first])
-        continue;
-      flag[iter->first] = true;
-      qu.push(iter->first);
-      wei[iter->first] = wei[t] + (iter->second);
-      tree[t].insert(iter->first);
-      parent[iter->first] = t;
+    // bfs, get new weight
+    int n = graph.size();
+    vector<double> wei;
+    vector<bool> flag;
+    wei.reserve(n);
+    flag.reserve(n);
+    for (int i = 0; i < n; i++)
+    {
+        flag.push_back(false);
+        wei.push_back(0);
+    }
+    queue<int> qu;
+    qu.push(nr);
+    flag[nr] = true;
+    map<int, set<int>> tree;
+    map<int, int> parent;
+    while (!qu.empty())
+    {
+        int t = qu.front();
+        qu.pop();
+        if (graph.find(t) == graph.end())
+            continue;
+        for (map<int, double>::iterator iter = graph[t].begin(); iter != graph[t].end(); ++iter)
+        {
+            if (flag[iter->first])
+                continue;
+            flag[iter->first] = true;
+            qu.push(iter->first);
+            wei[iter->first] = wei[t] + (iter->second);
+            tree[t].insert(iter->first);
+            parent[iter->first] = t;
+        }
     }
-  }
 #if 0
 printf("tree\n");
 for (map<int, set<int> >::iterator iter1 =tree.begin();iter1!=tree.end();++iter1) {
@@ -235,20 +265,21 @@ for (int i=0;i<wei.size();++i) {
 printf("%d %lf\n", i, wei[i]);
 }
 #endif
-  // eliminate old root
-  int old = n - 2;
-  xx = *(tree[old].begin());
-  for (map<int, set<int> >::iterator iter = tree.begin(); iter != tree.end();
-       ++iter) {
-    if (iter->second.find(old) != iter->second.end()) {
-      iter->second.erase(old);
-      iter->second.insert(xx);
-      parent[xx] = iter->first;
-      break;
+    // eliminate old root
+    int old = n - 2;
+    xx = *(tree[old].begin());
+    for (map<int, set<int>>::iterator iter = tree.begin(); iter != tree.end(); ++iter)
+    {
+        if (iter->second.find(old) != iter->second.end())
+        {
+            iter->second.erase(old);
+            iter->second.insert(xx);
+            parent[xx] = iter->first;
+            break;
+        }
     }
-  }
-  tree.erase(old);
-  parent.erase(old);
+    tree.erase(old);
+    parent.erase(old);
 
 #if 0
 printf("tree\n");
@@ -270,120 +301,128 @@ for (int i=0;i<wei.size();++i) {
 printf("%d %lf\n", i, wei[i]);
 }
 #endif
-  // print new tree
-  map<int, string> nts;
+    // print new tree
+    map<int, string> nts;
 
-  deque<pair<int, int> > de;
-  for (map<int, set<int> >::iterator iter = tree.begin(); iter != tree.end();
-       ++iter) {
-    if (iter->second.size() == 2) {
-      int a[3] = { 0, 0, 0 };
-      for (set<int>::iterator iter2 = iter->second.begin();
-           iter2 != iter->second.end(); ++iter2) {
-        if (tree.find(*iter2) == tree.end())
-          a[++a[0]] = *iter2;
-      }
-      if (a[0] == 2) {
-        de.push_back(pair<int, int>(a[1], a[2]));
-      }
+    deque<pair<int, int>> de;
+    for (map<int, set<int>>::iterator iter = tree.begin(); iter != tree.end(); ++iter)
+    {
+        if (iter->second.size() == 2)
+        {
+            int a[3] = {0, 0, 0};
+            for (set<int>::iterator iter2 = iter->second.begin(); iter2 != iter->second.end(); ++iter2)
+            {
+                if (tree.find(*iter2) == tree.end())
+                    a[++a[0]] = *iter2;
+            }
+            if (a[0] == 2)
+            {
+                de.push_back(pair<int, int>(a[1], a[2]));
+            }
+        }
     }
-  }
 
-  for (map<string, int>::iterator iter = leaf_to_label.begin();
-       iter != leaf_to_label.end(); ++iter) {
-    char tmp[100];
-    double tt = 0;
-    if (parent.find(iter->second) != parent.end())
-      tt = wei[parent[iter->second]];
-    sprintf(tmp, "%f", wei[iter->second] - tt);
-    if (iter->second != nr)
-      nts[iter->second] = iter->first + ':' + string(tmp);
-    else
-      nts[iter->second] = iter->first;
-  }
+    for (map<string, int>::iterator iter = leaf_to_label.begin(); iter != leaf_to_label.end(); ++iter)
+    {
+        char tmp[100];
+        double tt = 0;
+        if (parent.find(iter->second) != parent.end())
+            tt = wei[parent[iter->second]];
+        sprintf(tmp, "%f", wei[iter->second] - tt);
+        if (iter->second != nr)
+            nts[iter->second] = iter->first + ':' + string(tmp);
+        else
+            nts[iter->second] = iter->first;
+    }
 #if 0
 printf("node to string\n");
 for (map<int, string>::iterator iter =nts.begin();iter!=nts.end();++iter) {
 printf("%d: %s\n", iter->first, iter->second.c_str());
 }
 #endif
-  while (!de.empty()) {
-    pair<int, int> a = de.front();
-    de.pop_front();
-    int pa = parent[a.first];
-    char tmp[100];
-    double tt = 0;
-    if (parent.find(pa) != parent.end())
-      tt = wei[parent[pa]];
-    sprintf(tmp, "%f", wei[pa] - tt);
-    if (pa != nr)
-      nts[pa] =
-          '(' + nts[a.first] + ',' + nts[a.second] + ')' + ':' + string(tmp);
-    else
-      nts[pa] = '(' + nts[a.first] + ',' + nts[a.second] + ')';
-    tree.erase(pa);
-    if (parent.find(pa) != parent.end()) {
-      int ppa = parent[pa];
-      int sibling;
-      for (set<int>::iterator iter = tree[ppa].begin(); iter != tree[ppa].end();
-           ++iter) {
-        if ((*iter) != pa)
-          sibling = *iter;
-      }
-      if (tree.find(sibling) == tree.end()) {
-        de.push_back(pair<int, int>(pa, sibling));
-      }
+    while (!de.empty())
+    {
+        pair<int, int> a = de.front();
+        de.pop_front();
+        int pa = parent[a.first];
+        char tmp[100];
+        double tt = 0;
+        if (parent.find(pa) != parent.end())
+            tt = wei[parent[pa]];
+        sprintf(tmp, "%f", wei[pa] - tt);
+        if (pa != nr)
+            nts[pa] = '(' + nts[a.first] + ',' + nts[a.second] + ')' + ':' + string(tmp);
+        else
+            nts[pa] = '(' + nts[a.first] + ',' + nts[a.second] + ')';
+        tree.erase(pa);
+        if (parent.find(pa) != parent.end())
+        {
+            int ppa = parent[pa];
+            int sibling;
+            for (set<int>::iterator iter = tree[ppa].begin(); iter != tree[ppa].end(); ++iter)
+            {
+                if ((*iter) != pa)
+                    sibling = *iter;
+            }
+            if (tree.find(sibling) == tree.end())
+            {
+                de.push_back(pair<int, int>(pa, sibling));
+            }
+        }
     }
-  }
-  // printf("%s\n", nts[nr].c_str());
-  strRes = nts[nr];
-  return strRes;
+    //printf("%s\n", nts[nr].c_str());
+    strRes = nts[nr];
+    return strRes;
 }
 
-void Test_split() {
-  string a("(((1:1.0,2:2.0):1.2,(3:1.0,4:2.0):1.6):1.5,5:1.0)");
-  string b("( ( ( 1 : 1.0 , 2 : 2.0 ) : 1.2 , ( 3 : 1.0 , 4 : 2.0 ) : 1.6 ) : "
-           "1.5 , 5 : 1.0 ) ");
-  vector<string> elements;
-  split(a, elements);
-  for (int i = 0; i < elements.size(); ++i) {
-    printf("%s ", elements[i].c_str());
-  }
-  printf("\n");
-  split(b, elements);
-  for (int i = 0; i < elements.size(); ++i) {
-    printf("%s ", elements[i].c_str());
-  }
-  printf("\n");
+void Test_split()
+{
+    string a("(((1:1.0,2:2.0):1.2,(3:1.0,4:2.0):1.6):1.5,5:1.0)");
+    string b("( ( ( 1 : 1.0 , 2 : 2.0 ) : 1.2 , ( 3 : 1.0 , 4 : 2.0 ) : 1.6 ) : 1.5 , 5 : 1.0 ) ");
+    vector<string> elements;
+    split(a, elements);
+    for (int i = 0; i < elements.size(); ++i)
+    {
+        printf("%s ", elements[i].c_str());
+    }
+    printf("\n");
+    split(b, elements);
+    for (int i = 0; i < elements.size(); ++i)
+    {
+        printf("%s ", elements[i].c_str());
+    }
+    printf("\n");
 }
 
-void Test_buildGraph() {
-  string a("(((1:1.0,2:2.0):1.2,(3:1.0,4:2.0):1.6):1.5,5:1.0)");
-  vector<string> elements;
-  split(a, elements);
-  map<int, map<int, double> > graph;
-  map<string, int> leaf_to_label;
-  buildGraph(elements, graph, leaf_to_label);
-  printf("leaf to label\n");
-  for (map<string, int>::iterator iter = leaf_to_label.begin();
-       iter != leaf_to_label.end(); ++iter) {
-    printf("%s:%d\n", iter->first.c_str(), iter->second);
-  }
-  printf("Graph\n");
-  for (map<int, map<int, double> >::iterator iter1 = graph.begin();
-       iter1 != graph.end(); ++iter1) {
-    printf("%d:", iter1->first);
-    for (map<int, double>::iterator iter2 = iter1->second.begin();
-         iter2 != iter1->second.end(); ++iter2) {
-      printf("(%d,%lf) ", iter2->first, iter2->second);
+void Test_buildGraph()
+{
+    string a("(((1:1.0,2:2.0):1.2,(3:1.0,4:2.0):1.6):1.5,5:1.0)");
+    vector<string> elements;
+    split(a, elements);
+    map<int, map<int, double>> graph;
+    map<string, int> leaf_to_label;
+    buildGraph(elements, graph, leaf_to_label);
+    printf("leaf to label\n");
+    for (map<string, int>::iterator iter = leaf_to_label.begin(); iter != leaf_to_label.end(); ++iter)
+    {
+        printf("%s:%d\n", iter->first.c_str(), iter->second);
+    }
+    printf("Graph\n");
+    for (map<int, map<int, double>>::iterator iter1 = graph.begin(); iter1 != graph.end(); ++iter1)
+    {
+        printf("%d:", iter1->first);
+        for (map<int, double>::iterator iter2 = iter1->second.begin(); iter2 != iter1->second.end(); ++iter2)
+        {
+            printf("(%d,%lf) ", iter2->first, iter2->second);
+        }
+        printf("\n");
     }
-    printf("\n");
-  }
 }
 
-string ReRootTreeNewick(char *nwFile, char *taxaNewRoot) {
-  // char * a="(((1:1.0,2:2.0):1.2,(3:1.0,4:2.0):1.6):1.5,5:1.0)";
-  // char *b ="3";
-  // usage for converting
-  return convert(nwFile, taxaNewRoot);
+string ReRootTreeNewick(char *nwFile, char *taxaNewRoot)
+{
+    //char * a="(((1:1.0,2:2.0):1.2,(3:1.0,4:2.0):1.6):1.5,5:1.0)";
+    //char *b ="3";
+    // usage for converting
+    return convert(nwFile, taxaNewRoot);
 }
diff --git a/trisicell/external/scistree/ScistDoublet.cpp b/trisicell/external/scistree/ScistDoublet.cpp
index 697c2b0..1010873 100644
--- a/trisicell/external/scistree/ScistDoublet.cpp
+++ b/trisicell/external/scistree/ScistDoublet.cpp
@@ -7,123 +7,110 @@
 //
 
 #include "ScistDoublet.hpp"
-#include "PhylogenyTree.h"
-#include "PhylogenyTreeBasic.h"
 #include "ScistGenotype.hpp"
 #include "ScistPerfPhyImp.hpp"
 #include "Utils3.h"
+#include "PhylogenyTreeBasic.h"
+#include "PhylogenyTree.h"
 #include <iomanip>
 
 // *************************************************************************************
 // DP backtrace info
 
-ScistDoubletDPTraceback ::ScistDoubletDPTraceback()
-    : indexChild1(-1), phaseChild1(-1), indexChild2(-1), phaseChild2(-1) {}
-
-ScistDoubletDPTraceback ::ScistDoubletDPTraceback(
-    const ScistDoubletDPTraceback &rhs)
-    : indexChild1(rhs.indexChild1), phaseChild1(rhs.phaseChild1),
-      indexChild2(rhs.indexChild2), phaseChild2(rhs.phaseChild2) {}
-
-ScistDoubletDPTraceback &
-ScistDoubletDPTraceback ::operator=(const ScistDoubletDPTraceback &rhs) {
-  indexChild1 = rhs.indexChild1;
-  phaseChild1 = rhs.phaseChild1;
-  indexChild2 = rhs.indexChild2;
-  phaseChild2 = rhs.phaseChild2;
-  return *this;
+ScistDoubletDPTraceback ::ScistDoubletDPTraceback() : indexChild1(-1), phaseChild1(-1), indexChild2(-1), phaseChild2(-1)
+{
+}
+
+ScistDoubletDPTraceback ::ScistDoubletDPTraceback(const ScistDoubletDPTraceback &rhs) : indexChild1(rhs.indexChild1), phaseChild1(rhs.phaseChild1), indexChild2(rhs.indexChild2), phaseChild2(rhs.phaseChild2)
+{
+}
+
+ScistDoubletDPTraceback &ScistDoubletDPTraceback ::operator=(const ScistDoubletDPTraceback &rhs)
+{
+    indexChild1 = rhs.indexChild1;
+    phaseChild1 = rhs.phaseChild1;
+    indexChild2 = rhs.indexChild2;
+    phaseChild2 = rhs.phaseChild2;
+    return *this;
 }
 
 // *************************************************************************************
 // Deal with doublet
 
-ScistDoublet ::ScistDoublet(const ScistGenGenotypeMat &genosInputIn)
-    : genosInput(genosInputIn) {}
-
-double ScistDoublet ::EvalGenoDoublet(const set<int> &setTemplateRows,
-                                      int genoDoublet,
-                                      vector<int> &genoDoublePhase1,
-                                      vector<int> &genoDoublePhase2) const {
-  // construct cluster trees
-  map<int, ScistPerfPhyCluster> setTemplateSites;
-  std::map<const ScistPerfPhyCluster *, int> mapClusToSiteIndex;
-  ConsClustersForTemplates(setTemplateRows, setTemplateSites,
-                           mapClusToSiteIndex);
-
-  ScistPerfPhyClusTreeNode *pClusTreeRoot =
-      ScistPerfPhyClusTreeNode::ConsClusterTree(setTemplateSites);
-
-  // construct solution based on this
-  std::map<ScistPerfPhyClusTreeNode *,
-           std::vector<std::pair<double, ScistDoubletDPTraceback> > >
-      mapNodeVals;
-  ConsDPTblDoubletNodes(setTemplateSites, mapClusToSiteIndex, genoDoublet,
-                        pClusTreeRoot, mapNodeVals);
-
-  //
-  double minCost = mapNodeVals[pClusTreeRoot][3].first;
-  // cout << "The min-cost phasing has optimal cost: " << minCost << endl;
-
-  vector<int> vecPhasing;
-  ConsPhasing(mapClusToSiteIndex, genoDoublet, pClusTreeRoot, mapNodeVals,
-              vecPhasing);
-  // cout << "Phasing vector: ";
-  // DumpIntVec( vecPhasing);
-
-  // now construct phasing
-  ConsPhasingVec(vecPhasing, genoDoublePhase1, genoDoublePhase2);
-
-  delete pClusTreeRoot;
-  return minCost;
+ScistDoublet ::ScistDoublet(const ScistGenGenotypeMat &genosInputIn) : genosInput(genosInputIn)
+{
 }
 
-void ScistDoublet ::ConsClustersForTemplates(
-    const set<int> &setTemplateRows,
-    std::map<int, ScistPerfPhyCluster> &setTemplateSites,
-    std::map<const ScistPerfPhyCluster *, int> &mapClusToSiteIndex) const {
-  // only use those rows
-  setTemplateSites.clear();
-
-  for (int s = 0; s < genosInput.GetNumSites(); ++s) {
-    set<int> rowsMut;
-    genosInput.GetMutRowsHapAtSite(s, rowsMut);
-    set<int> rowsMutInTemp;
-    JoinSets(rowsMut, setTemplateRows, rowsMutInTemp);
-
-    // ignore any singleton
-    if (rowsMutInTemp.size() == 0) {
-      continue;
+double ScistDoublet ::EvalGenoDoublet(const set<int> &setTemplateRows, int genoDoublet, vector<int> &genoDoublePhase1, vector<int> &genoDoublePhase2) const
+{
+    // construct cluster trees
+    map<int, ScistPerfPhyCluster> setTemplateSites;
+    std::map<const ScistPerfPhyCluster *, int> mapClusToSiteIndex;
+    ConsClustersForTemplates(setTemplateRows, setTemplateSites, mapClusToSiteIndex);
+
+    ScistPerfPhyClusTreeNode *pClusTreeRoot = ScistPerfPhyClusTreeNode::ConsClusterTree(setTemplateSites);
+
+    // construct solution based on this
+    std::map<ScistPerfPhyClusTreeNode *, std::vector<std::pair<double, ScistDoubletDPTraceback>>> mapNodeVals;
+    ConsDPTblDoubletNodes(setTemplateSites, mapClusToSiteIndex, genoDoublet, pClusTreeRoot, mapNodeVals);
+
+    //
+    double minCost = mapNodeVals[pClusTreeRoot][3].first;
+    //cout << "The min-cost phasing has optimal cost: " << minCost << endl;
+
+    vector<int> vecPhasing;
+    ConsPhasing(mapClusToSiteIndex, genoDoublet, pClusTreeRoot, mapNodeVals, vecPhasing);
+    //cout << "Phasing vector: ";
+    //DumpIntVec( vecPhasing);
+
+    // now construct phasing
+    ConsPhasingVec(vecPhasing, genoDoublePhase1, genoDoublePhase2);
+
+    delete pClusTreeRoot;
+    return minCost;
+}
+
+void ScistDoublet ::ConsClustersForTemplates(const set<int> &setTemplateRows, std::map<int, ScistPerfPhyCluster> &setTemplateSites, std::map<const ScistPerfPhyCluster *, int> &mapClusToSiteIndex) const
+{
+    // only use those rows
+    setTemplateSites.clear();
+
+    for (int s = 0; s < genosInput.GetNumSites(); ++s)
+    {
+        set<int> rowsMut;
+        genosInput.GetMutRowsHapAtSite(s, rowsMut);
+        set<int> rowsMutInTemp;
+        JoinSets(rowsMut, setTemplateRows, rowsMutInTemp);
+
+        // ignore any singleton
+        if (rowsMutInTemp.size() == 0)
+        {
+            continue;
+        }
+
+        ScistPerfPhyCluster clus(rowsMutInTemp);
+        setTemplateSites[s] = rowsMutInTemp;
+    }
+
+    // construct reverse mapping
+    for (map<int, ScistPerfPhyCluster>::iterator it = setTemplateSites.begin(); it != setTemplateSites.end(); ++it)
+    {
+        mapClusToSiteIndex[&(it->second)] = it->first;
     }
 
-    ScistPerfPhyCluster clus(rowsMutInTemp);
-    setTemplateSites[s] = rowsMutInTemp;
-  }
-
-  // construct reverse mapping
-  for (map<int, ScistPerfPhyCluster>::iterator it = setTemplateSites.begin();
-       it != setTemplateSites.end(); ++it) {
-    mapClusToSiteIndex[&(it->second)] = it->first;
-  }
-
-  // cout << "ConsClustersForTemplates: template rows\n";
-  // for( map<int, ScistPerfPhyCluster > :: iterator it =
-  // setTemplateSites.begin(); it != setTemplateSites.end(); ++it )
-  //{
-  // cout << "Site " << it->first << ": mut rows within template: ";
-  // it->second.Dump();
-  //}
+    //cout << "ConsClustersForTemplates: template rows\n";
+    //for( map<int, ScistPerfPhyCluster > :: iterator it = setTemplateSites.begin(); it != setTemplateSites.end(); ++it )
+    //{
+    //cout << "Site " << it->first << ": mut rows within template: ";
+    //it->second.Dump();
+    //}
 }
 
-void ScistDoublet ::ConsDPTblDoubletNodes(
-    const std::map<int, ScistPerfPhyCluster> &setTemplateSites,
-    const std::map<const ScistPerfPhyCluster *, int> &mapClusToSiteIndex,
-    int genoDoublet, ScistPerfPhyClusTreeNode *pNodeCurr,
-    std::map<ScistPerfPhyClusTreeNode *,
-             vector<pair<double, ScistDoubletDPTraceback> > > &mapNodeVals)
-    const {
-  // cons DP table for doublet recursively from bottom up
-  //
-  const ScistPerfPhyCluster *pClus = pNodeCurr->GetClus();
+void ScistDoublet ::ConsDPTblDoubletNodes(const std::map<int, ScistPerfPhyCluster> &setTemplateSites, const std::map<const ScistPerfPhyCluster *, int> &mapClusToSiteIndex, int genoDoublet, ScistPerfPhyClusTreeNode *pNodeCurr, std::map<ScistPerfPhyClusTreeNode *, vector<pair<double, ScistDoubletDPTraceback>>> &mapNodeVals) const
+{
+    // cons DP table for doublet recursively from bottom up
+    //
+    const ScistPerfPhyCluster *pClus = pNodeCurr->GetClus();
 
 #if 0
     // work with all sites
@@ -149,480 +136,508 @@ void ScistDoublet ::ConsDPTblDoubletNodes(
     }
     mapNodeVals[ pNodeCurr ] = vecThis;
 #endif
-  //#if 0
-  if (pClus != NULL) {
-    map<const ScistPerfPhyCluster *, int>::const_iterator it =
-        mapClusToSiteIndex.find(pClus);
-    YW_ASSERT_INFO(it != mapClusToSiteIndex.end(), "Fail to find the cluster2");
-    int site = it->second;
+    //#if 0
+    if (pClus != NULL)
+    {
+        map<const ScistPerfPhyCluster *, int>::const_iterator it = mapClusToSiteIndex.find(pClus);
+        YW_ASSERT_INFO(it != mapClusToSiteIndex.end(), "Fail to find the cluster2");
+        int site = it->second;
 
-    //
-    // double prob0 = this->genosInput.GetScoreForGeno( genoDoublet, site, 0 );
-    // double prob1 = this->genosInput.GetScoreForGeno( genoDoublet, site, 1 );
-    double prob0Orig =
-        this->genosInput.GetGenotypeProbAllele0At(genoDoublet, site);
-    double prob0 = -1.0 * log(prob0Orig);
-    double prob1 = -1.0 * log(1.0 - prob0Orig);
-    vector<pair<double, ScistDoubletDPTraceback> > vecThis(4);
-    vecThis[0].first = prob0;
-    vecThis[1].first = prob1;
-    vecThis[2].first = prob1;
-    vecThis[3].first = prob1;
-    mapNodeVals[pNodeCurr] = vecThis;
-  } else {
-    // otherwise everything is zero
-    vector<pair<double, ScistDoubletDPTraceback> > vecThis(4);
-    vecThis[0].first = 0.0;
-    vecThis[1].first = 0.0;
-    vecThis[2].first = 0.0;
-    vecThis[3].first = 0.0;
-    mapNodeVals[pNodeCurr] = vecThis;
-  }
-  //#endif
-
-  if (pNodeCurr->IsLeaf()) {
-    return;
-  }
-
-  // internal node: first construct all the descendents
-  for (int i = 0; i < pNodeCurr->GetNumChildren(); ++i) {
-    ScistPerfPhyClusTreeNode *pChild = pNodeCurr->GetChild(i);
-    ConsDPTblDoubletNodes(setTemplateSites, mapClusToSiteIndex, genoDoublet,
-                          pChild, mapNodeVals);
-  }
-
-  // now setup the values for the current node
-  vector<pair<double, ScistDoubletDPTraceback> > vec;
-
-  // phasing 00
-  pair<double, ScistDoubletDPTraceback> mv00;
-  mv00.first = 0.0;
-  for (int i = 0; i < pNodeCurr->GetNumChildren(); ++i) {
-    ScistPerfPhyClusTreeNode *pChild = pNodeCurr->GetChild(i);
-    mv00.first += mapNodeVals[pChild][0].first;
-  }
-  // use default traceback
-  vec.push_back(mv00);
-
-  // phasing 01
-  pair<double, ScistDoubletDPTraceback> mv01;
-  mv01.first = mv00.first;
-  for (int i = 0; i < pNodeCurr->GetNumChildren(); ++i) {
-    ScistPerfPhyClusTreeNode *pChild = pNodeCurr->GetChild(i);
+        //
+        //double prob0 = this->genosInput.GetScoreForGeno( genoDoublet, site, 0 );
+        //double prob1 = this->genosInput.GetScoreForGeno( genoDoublet, site, 1 );
+        double prob0Orig = this->genosInput.GetGenotypeProbAllele0At(genoDoublet, site);
+        double prob0 = -1.0 * log(prob0Orig);
+        double prob1 = -1.0 * log(1.0 - prob0Orig);
+        vector<pair<double, ScistDoubletDPTraceback>> vecThis(4);
+        vecThis[0].first = prob0;
+        vecThis[1].first = prob1;
+        vecThis[2].first = prob1;
+        vecThis[3].first = prob1;
+        mapNodeVals[pNodeCurr] = vecThis;
+    }
+    else
+    {
+        // otherwise everything is zero
+        vector<pair<double, ScistDoubletDPTraceback>> vecThis(4);
+        vecThis[0].first = 0.0;
+        vecThis[1].first = 0.0;
+        vecThis[2].first = 0.0;
+        vecThis[3].first = 0.0;
+        mapNodeVals[pNodeCurr] = vecThis;
+    }
+    //#endif
 
-    //
-    double mv01i = mv00.first - mapNodeVals[pChild][0].first +
-                   mapNodeVals[pChild][1].first;
-    if (mv01i < mv01.first) {
-      mv01.first = mv01i;
-      mv01.second.SetChild1(i);
-      mv01.second.SetPhase1(1);
+    if (pNodeCurr->IsLeaf())
+    {
+        return;
     }
-  }
-  vec.push_back(mv01);
 
-  // phasing 10
-  pair<double, ScistDoubletDPTraceback> mv10;
-  mv10.first = mv00.first;
-  for (int i = 0; i < pNodeCurr->GetNumChildren(); ++i) {
-    ScistPerfPhyClusTreeNode *pChild = pNodeCurr->GetChild(i);
+    // internal node: first construct all the descendents
+    for (int i = 0; i < pNodeCurr->GetNumChildren(); ++i)
+    {
+        ScistPerfPhyClusTreeNode *pChild = pNodeCurr->GetChild(i);
+        ConsDPTblDoubletNodes(setTemplateSites, mapClusToSiteIndex, genoDoublet, pChild, mapNodeVals);
+    }
 
-    //
-    double mv10i = mv00.first - mapNodeVals[pChild][0].first +
-                   mapNodeVals[pChild][2].first;
-    if (mv10i < mv10.first) {
-      mv10.first = mv10i;
-      mv10.second.SetChild1(i);
-      mv10.second.SetPhase1(2);
+    // now setup the values for the current node
+    vector<pair<double, ScistDoubletDPTraceback>> vec;
+
+    // phasing 00
+    pair<double, ScistDoubletDPTraceback> mv00;
+    mv00.first = 0.0;
+    for (int i = 0; i < pNodeCurr->GetNumChildren(); ++i)
+    {
+        ScistPerfPhyClusTreeNode *pChild = pNodeCurr->GetChild(i);
+        mv00.first += mapNodeVals[pChild][0].first;
     }
-  }
-  vec.push_back(mv10);
-
-  // phasing 11
-  pair<double, ScistDoubletDPTraceback> mv11;
-  mv11.first = std::min(mv01.first, mv10.first);
-  // setup trace back
-  if (mv11.first == mv01.first) {
-    mv11.second = mv01.second;
-  } else {
-    mv11.second = mv10.second;
-  }
-
-  // consider exatly one is 11
-  for (int i = 0; i < pNodeCurr->GetNumChildren(); ++i) {
-    ScistPerfPhyClusTreeNode *pChild = pNodeCurr->GetChild(i);
+    // use default traceback
+    vec.push_back(mv00);
 
-    //
-    double mv11i = mv00.first - mapNodeVals[pChild][0].first +
-                   mapNodeVals[pChild][3].first;
-    if (mv11i < mv11.first) {
-      mv11.first = mv11i;
-      mv11.second.SetChild1(i);
-      mv11.second.SetPhase1(3);
+    // phasing 01
+    pair<double, ScistDoubletDPTraceback> mv01;
+    mv01.first = mv00.first;
+    for (int i = 0; i < pNodeCurr->GetNumChildren(); ++i)
+    {
+        ScistPerfPhyClusTreeNode *pChild = pNodeCurr->GetChild(i);
+
+        //
+        double mv01i = mv00.first - mapNodeVals[pChild][0].first + mapNodeVals[pChild][1].first;
+        if (mv01i < mv01.first)
+        {
+            mv01.first = mv01i;
+            mv01.second.SetChild1(i);
+            mv01.second.SetPhase1(1);
+        }
     }
-  }
-  // consider a pair of i and j
-  for (int i = 0; i < pNodeCurr->GetNumChildren(); ++i) {
-    ScistPerfPhyClusTreeNode *pChildi = pNodeCurr->GetChild(i);
-
-    for (int j = 0; j < pNodeCurr->GetNumChildren(); ++j) {
-      if (i == j) {
-        continue;
-      }
-
-      ScistPerfPhyClusTreeNode *pChildj = pNodeCurr->GetChild(j);
-
-      //
-      double mv11i = mv00.first - mapNodeVals[pChildi][0].first -
-                     mapNodeVals[pChildj][0].first +
-                     mapNodeVals[pChildi][1].first +
-                     mapNodeVals[pChildj][2].first;
-      if (mv11i < mv11.first) {
-        mv11.first = mv11i;
-        mv11.second.SetChild1(i);
-        mv11.second.SetPhase1(1);
-        mv11.second.SetChild2(j);
-        mv11.second.SetPhase2(2);
-      }
+    vec.push_back(mv01);
+
+    // phasing 10
+    pair<double, ScistDoubletDPTraceback> mv10;
+    mv10.first = mv00.first;
+    for (int i = 0; i < pNodeCurr->GetNumChildren(); ++i)
+    {
+        ScistPerfPhyClusTreeNode *pChild = pNodeCurr->GetChild(i);
+
+        //
+        double mv10i = mv00.first - mapNodeVals[pChild][0].first + mapNodeVals[pChild][2].first;
+        if (mv10i < mv10.first)
+        {
+            mv10.first = mv10i;
+            mv10.second.SetChild1(i);
+            mv10.second.SetPhase1(2);
+        }
     }
-  }
-  vec.push_back(mv11);
-
-  // add the current cost
-  for (int i = 0; i < (int)vec.size(); ++i) {
-    vec[i].first += mapNodeVals[pNodeCurr][i].first;
-  }
-  mapNodeVals[pNodeCurr] = vec;
-}
+    vec.push_back(mv10);
 
-void ScistDoublet ::ConsPhasing(
-    const std::map<const ScistPerfPhyCluster *, int> &mapClusToSiteIndex,
-    int genoDoublet, ScistPerfPhyClusTreeNode *pNodeRoot,
-    const std::map<ScistPerfPhyClusTreeNode *,
-                   std::vector<std::pair<double, ScistDoubletDPTraceback> > >
-        &mapNodeVals,
-    vector<int> &vecPhasing) const {
-  //
-  vecPhasing.resize(this->genosInput.GetNumSites());
-
-  // init all phasing to be 00 for genotype 0 and 01 for genotype 1
-  for (int i = 0; i < this->genosInput.GetNumSites(); ++i) {
-    int geno = this->genosInput.GetGenotypeAt(genoDoublet, i);
-    if (geno == 0) {
-      vecPhasing[i] = 0;
-    } else {
-      vecPhasing[i] = 1;
+    // phasing 11
+    pair<double, ScistDoubletDPTraceback> mv11;
+    mv11.first = std::min(mv01.first, mv10.first);
+    // setup trace back
+    if (mv11.first == mv01.first)
+    {
+        mv11.second = mv01.second;
+    }
+    else
+    {
+        mv11.second = mv10.second;
     }
-  }
-  const int ROOT_PHASING = 3;
-  TracePhasingAtNode(mapClusToSiteIndex, genoDoublet, pNodeRoot, ROOT_PHASING,
-                     mapNodeVals, vecPhasing);
-}
 
-void ScistDoublet ::TracePhasingAtNode(
-    const std::map<const ScistPerfPhyCluster *, int> &mapClusToSiteIndex,
-    int genoDoublet, ScistPerfPhyClusTreeNode *pNodeCurr, int phasingCurr,
-    const std::map<ScistPerfPhyClusTreeNode *,
-                   std::vector<std::pair<double, ScistDoubletDPTraceback> > >
-        &mapNodeVals,
-    vector<int> &vecPhasing) const {
-  //
-  const ScistPerfPhyCluster *pClus = pNodeCurr->GetClus();
-  if (pClus != NULL) {
-    map<const ScistPerfPhyCluster *, int>::const_iterator it =
-        mapClusToSiteIndex.find(pClus);
-    YW_ASSERT_INFO(it != mapClusToSiteIndex.end(), "Fail to find the cluster2");
-    int site = it->second;
-
-    // record this phasing
-    vecPhasing[site] = phasingCurr;
-  }
-
-  // consider all children
-  std::map<ScistPerfPhyClusTreeNode *,
-           std::vector<std::pair<double, ScistDoubletDPTraceback> > >::
-      const_iterator it = mapNodeVals.find(pNodeCurr);
-  YW_ASSERT_INFO(it != mapNodeVals.end(), "Fail to find");
-  for (int i = 0; i < pNodeCurr->GetNumChildren(); ++i) {
-    ScistPerfPhyClusTreeNode *pChild = pNodeCurr->GetChild(i);
-    int phasingChild = 0;
-    if (it->second[phasingCurr].second.GetChild1() == i) {
-      phasingChild = it->second[phasingCurr].second.GetPhase1();
-    } else if (it->second[phasingCurr].second.GetChild2() == i) {
-      phasingChild = it->second[phasingCurr].second.GetPhase2();
+    // consider exatly one is 11
+    for (int i = 0; i < pNodeCurr->GetNumChildren(); ++i)
+    {
+        ScistPerfPhyClusTreeNode *pChild = pNodeCurr->GetChild(i);
+
+        //
+        double mv11i = mv00.first - mapNodeVals[pChild][0].first + mapNodeVals[pChild][3].first;
+        if (mv11i < mv11.first)
+        {
+            mv11.first = mv11i;
+            mv11.second.SetChild1(i);
+            mv11.second.SetPhase1(3);
+        }
     }
-    TracePhasingAtNode(mapClusToSiteIndex, genoDoublet, pChild, phasingChild,
-                       mapNodeVals, vecPhasing);
-  }
-}
+    // consider a pair of i and j
+    for (int i = 0; i < pNodeCurr->GetNumChildren(); ++i)
+    {
+        ScistPerfPhyClusTreeNode *pChildi = pNodeCurr->GetChild(i);
+
+        for (int j = 0; j < pNodeCurr->GetNumChildren(); ++j)
+        {
+            if (i == j)
+            {
+                continue;
+            }
 
-void ScistDoublet ::ConsPhasingVec(const std::vector<int> &vecPhasing,
-                                   std::vector<int> &genoDoublePhase1,
-                                   std::vector<int> &genoDoublePhase2) const {
-  //
-  genoDoublePhase1.clear();
-  genoDoublePhase2.clear();
-  for (int i = 0; i < (int)vecPhasing.size(); ++i) {
-    int p = vecPhasing[i];
-    int a1, a2;
-    if (p == 0) {
-      a1 = 0;
-      a2 = 0;
-    } else if (p == 1) {
-      a1 = 0;
-      a2 = 1;
-    } else if (p == 2) {
-      a1 = 1;
-      a2 = 0;
-    } else {
-      a1 = 1;
-      a2 = 1;
+            ScistPerfPhyClusTreeNode *pChildj = pNodeCurr->GetChild(j);
+
+            //
+            double mv11i = mv00.first - mapNodeVals[pChildi][0].first - mapNodeVals[pChildj][0].first + mapNodeVals[pChildi][1].first + mapNodeVals[pChildj][2].first;
+            if (mv11i < mv11.first)
+            {
+                mv11.first = mv11i;
+                mv11.second.SetChild1(i);
+                mv11.second.SetPhase1(1);
+                mv11.second.SetChild2(j);
+                mv11.second.SetPhase2(2);
+            }
+        }
     }
-    genoDoublePhase1.push_back(a1);
-    genoDoublePhase2.push_back(a2);
-  }
-}
+    vec.push_back(mv11);
 
-// *************************************************************************************
-// Deal with doublet (search)
+    // add the current cost
+    for (int i = 0; i < (int)vec.size(); ++i)
+    {
+        vec[i].first += mapNodeVals[pNodeCurr][i].first;
+    }
+    mapNodeVals[pNodeCurr] = vec;
+}
 
-const double DEF_DOUBLET_COST = 0.0;
+void ScistDoublet ::ConsPhasing(const std::map<const ScistPerfPhyCluster *, int> &mapClusToSiteIndex, int genoDoublet, ScistPerfPhyClusTreeNode *pNodeRoot, const std::map<ScistPerfPhyClusTreeNode *, std::vector<std::pair<double, ScistDoubletDPTraceback>>> &mapNodeVals, vector<int> &vecPhasing) const
+{
+    //
+    vecPhasing.resize(this->genosInput.GetNumSites());
 
-ScistDoubletSearch ::ScistDoubletSearch(const ScistGenGenotypeMat &genosInputIn,
-                                        int maxDoubletSubsetSzIn)
-    : genosInput(genosInputIn), maxDoubletSubsetSz(maxDoubletSubsetSzIn),
-      costDoublet(DEF_DOUBLET_COST), fVerbose(false),
-      fOutputPPWithEdgeLabels(false) {}
-
-void ScistDoubletSearch ::Search() {
-  // cout << "Matrix: ";
-  // this->genosInput.Dump();
-  set<int> setCandidates;
-  FindDoubletCandidates(setCandidates);
-  // cout << "Candidates: ";
-  // DumpIntSet(setCandidates);
-  int szDoublets = this->maxDoubletSubsetSz;
-  if (szDoublets > (int)setCandidates.size()) {
-    szDoublets = (int)setCandidates.size();
-  }
-  YW_ASSERT_INFO(szDoublets > 0, "Wrong: no doublets to work with. Consider "
-                                 "run without specifying doublets");
-
-  // try all subset up to a level
-  double opt = HAP_MAX_INT * 1.0;
-  ScistGenGenotypeMat *pMatRes = NULL;
-  for (int szDoubletsStep = 0; szDoubletsStep <= szDoublets; ++szDoubletsStep) {
-    vector<int> posvec;
-    GetFirstCombo(szDoubletsStep, (int)setCandidates.size(), posvec);
-    while (true) {
-      // now work with the chosen subset
-      set<int> rowsDoubles;
-      PopulateSetByVec(rowsDoubles, posvec);
-      // cout << "Processing doublets: ";
-      // DumpIntSet(rowsDoubles);
-      //
-      double optStep = 0.0;
-      ScistGenGenotypeMat *pMatStep =
-          EvalGenoDoubletSet(this->genosInput, rowsDoubles, optStep);
-      YW_ASSERT_INFO(pMatStep != NULL, "Canot be null");
-      // cout << "optStep: " << optStep << endl;
-      if (optStep < opt) {
-        // cout << "BETTER\n";
-        opt = optStep;
-        if (pMatRes != NULL) {
-          delete pMatRes;
+    // init all phasing to be 00 for genotype 0 and 01 for genotype 1
+    for (int i = 0; i < this->genosInput.GetNumSites(); ++i)
+    {
+        int geno = this->genosInput.GetGenotypeAt(genoDoublet, i);
+        if (geno == 0)
+        {
+            vecPhasing[i] = 0;
+        }
+        else
+        {
+            vecPhasing[i] = 1;
         }
-        pMatRes = pMatStep;
-      } else {
-        delete pMatStep;
-      }
-
-      if (GetNextCombo(szDoubletsStep, (int)setCandidates.size(), posvec) ==
-          false) {
-        break;
-      }
     }
-  }
-  YW_ASSERT_INFO(pMatRes != NULL, "Resulting matrix: not found");
-  cout << "**** Optimal cost for doublet resoultion: " << opt << endl;
-  if (fVerbose) {
-    pMatRes->OutputImput();
-  }
-  string strTree = pMatRes->ConsTree();
-  cout << "Constructed single cell phylogeny: " << strTree << endl;
-
-  if (this->fVerbose) {
-    // keep track of imputation results
-    ScistGenGenotypeMat *pMatImpute = genosInput.Copy();
-    std::map<int, std::set<int> > mapDoublets;
-    FindOrigImputedGeno(*pMatRes, *pMatImpute, mapDoublets);
+    const int ROOT_PHASING = 3;
+    TracePhasingAtNode(mapClusToSiteIndex, genoDoublet, pNodeRoot, ROOT_PHASING, mapNodeVals, vecPhasing);
+}
 
+void ScistDoublet ::TracePhasingAtNode(const std::map<const ScistPerfPhyCluster *, int> &mapClusToSiteIndex, int genoDoublet, ScistPerfPhyClusTreeNode *pNodeCurr, int phasingCurr, const std::map<ScistPerfPhyClusTreeNode *, std::vector<std::pair<double, ScistDoubletDPTraceback>>> &mapNodeVals, vector<int> &vecPhasing) const
+{
     //
-    cout << "Doublet genotypes (1-based)): <original dobule genotype> : <list "
-            "of expanded doublet rows in imputed matrix>\n";
-    for (map<int, set<int> >::iterator it = mapDoublets.begin();
-         it != mapDoublets.end(); ++it) {
-      cout << it->first << " : ";
-      for (set<int>::const_iterator it2 = it->second.begin();
-           it2 != it->second.end(); ++it2) {
-        cout << *it2 + 1 << " ";
-      }
-      cout << endl;
+    const ScistPerfPhyCluster *pClus = pNodeCurr->GetClus();
+    if (pClus != NULL)
+    {
+        map<const ScistPerfPhyCluster *, int>::const_iterator it = mapClusToSiteIndex.find(pClus);
+        YW_ASSERT_INFO(it != mapClusToSiteIndex.end(), "Fail to find the cluster2");
+        int site = it->second;
+
+        // record this phasing
+        vecPhasing[site] = phasingCurr;
     }
 
-    // also output the imputaton results
-    cout << "Imputed genotypes: \n";
-    pMatImpute->OutputImput();
-
-    set<pair<pair<int, int>, int> > listChangedPlaces;
-    for (int i = 0; i < genosInput.GetNumHaps(); ++i) {
-      for (int j = 0; j < genosInput.GetNumSites(); ++j) {
-        if (genosInput.GetGenotypeAt(i, j) != pMatImpute->GetGenotypeAt(i, j)) {
-          pair<int, int> pp(i, j);
-          pair<pair<int, int>, int> pp1(pp, pMatImpute->GetGenotypeAt(i, j));
-          listChangedPlaces.insert(pp1);
+    // consider all children
+    std::map<ScistPerfPhyClusTreeNode *, std::vector<std::pair<double, ScistDoubletDPTraceback>>>::const_iterator it = mapNodeVals.find(pNodeCurr);
+    YW_ASSERT_INFO(it != mapNodeVals.end(), "Fail to find");
+    for (int i = 0; i < pNodeCurr->GetNumChildren(); ++i)
+    {
+        ScistPerfPhyClusTreeNode *pChild = pNodeCurr->GetChild(i);
+        int phasingChild = 0;
+        if (it->second[phasingCurr].second.GetChild1() == i)
+        {
+            phasingChild = it->second[phasingCurr].second.GetPhase1();
+        }
+        else if (it->second[phasingCurr].second.GetChild2() == i)
+        {
+            phasingChild = it->second[phasingCurr].second.GetPhase2();
         }
-      }
+        TracePhasingAtNode(mapClusToSiteIndex, genoDoublet, pChild, phasingChild, mapNodeVals, vecPhasing);
     }
-    cout << "List of corrected genotypes (site, cell, new genotype) in base-1: "
-            "\n";
-    for (set<pair<pair<int, int>, int> >::iterator it =
-             listChangedPlaces.begin();
-         it != listChangedPlaces.end(); ++it) {
-      cout << "[ " << setw(6) << it->first.second + 1 << " " << setw(6)
-           << it->first.first + 1 << " ]: " << it->second << endl;
+}
+
+void ScistDoublet ::ConsPhasingVec(const std::vector<int> &vecPhasing, std::vector<int> &genoDoublePhase1, std::vector<int> &genoDoublePhase2) const
+{
+    //
+    genoDoublePhase1.clear();
+    genoDoublePhase2.clear();
+    for (int i = 0; i < (int)vecPhasing.size(); ++i)
+    {
+        int p = vecPhasing[i];
+        int a1, a2;
+        if (p == 0)
+        {
+            a1 = 0;
+            a2 = 0;
+        }
+        else if (p == 1)
+        {
+            a1 = 0;
+            a2 = 1;
+        }
+        else if (p == 2)
+        {
+            a1 = 1;
+            a2 = 0;
+        }
+        else
+        {
+            a1 = 1;
+            a2 = 1;
+        }
+        genoDoublePhase1.push_back(a1);
+        genoDoublePhase2.push_back(a2);
     }
+}
 
-    delete pMatImpute;
-  }
+// *************************************************************************************
+// Deal with doublet (search)
 
-  delete pMatRes;
+const double DEF_DOUBLET_COST = 0.0;
+
+ScistDoubletSearch ::ScistDoubletSearch(const ScistGenGenotypeMat &genosInputIn, int maxDoubletSubsetSzIn) : genosInput(genosInputIn), maxDoubletSubsetSz(maxDoubletSubsetSzIn), costDoublet(DEF_DOUBLET_COST), fVerbose(false), fOutputPPWithEdgeLabels(false)
+{
 }
 
-void ScistDoubletSearch ::SearchInc() {
-  // search incrementally for doublets
-  ScistGenGenotypeMat *pMatRes = this->genosInput.Copy();
-  double optFinal = 1.0 * HAP_MAX_INT;
-  bool fInit = false;
+void ScistDoubletSearch ::Search()
+{
+    //cout << "Matrix: ";
+    //this->genosInput.Dump();
+    set<int> setCandidates;
+    FindDoubletCandidates(setCandidates);
+    //cout << "Candidates: ";
+    //DumpIntSet(setCandidates);
+    int szDoublets = this->maxDoubletSubsetSz;
+    if (szDoublets > (int)setCandidates.size())
+    {
+        szDoublets = (int)setCandidates.size();
+    }
+    YW_ASSERT_INFO(szDoublets > 0, "Wrong: no doublets to work with. Consider run without specifying doublets");
 
-  int numDoublesUsed = 0;
-  while (numDoublesUsed < this->maxDoubletSubsetSz) {
+    // try all subset up to a level
     double opt = HAP_MAX_INT * 1.0;
-    set<int> rowsDoublesEmpty;
-    ScistGenGenotypeMat *pMatInitDump =
-        EvalGenoDoubletSet(*pMatRes, rowsDoublesEmpty, opt);
-    YW_ASSERT_INFO(pMatInitDump != NULL, "Cannot be null");
-    // cout << "pMatInitDump: ";
-    // pMatInitDump->Dump();
-    // ScistHaplotypeMat *pMatResHap0 = dynamic_cast<ScistHaplotypeMat
-    // *>(pMatInitDump); string strTreeEdgeLabel0 =
-    // ConsRootedPerfectPhylogenyFromMat(pMatResHap0->GetHapMat(), true, true);
-    // cout << "Stepwise tree: " << strTreeEdgeLabel0 << endl;
-    delete pMatInitDump;
-
-    if (fInit == false) {
-      fInit = true;
-      optFinal = opt;
-    }
+    ScistGenGenotypeMat *pMatRes = NULL;
+    for (int szDoubletsStep = 0; szDoubletsStep <= szDoublets; ++szDoubletsStep)
+    {
+        vector<int> posvec;
+        GetFirstCombo(szDoubletsStep, (int)setCandidates.size(), posvec);
+        while (true)
+        {
+            // now work with the chosen subset
+            set<int> rowsDoubles;
+            PopulateSetByVec(rowsDoubles, posvec);
+            //cout << "Processing doublets: ";
+            //DumpIntSet(rowsDoubles);
+            //
+            double optStep = 0.0;
+            ScistGenGenotypeMat *pMatStep = EvalGenoDoubletSet(this->genosInput, rowsDoubles, optStep);
+            YW_ASSERT_INFO(pMatStep != NULL, "Canot be null");
+            //cout << "optStep: " << optStep << endl;
+            if (optStep < opt)
+            {
+                //cout << "BETTER\n";
+                opt = optStep;
+                if (pMatRes != NULL)
+                {
+                    delete pMatRes;
+                }
+                pMatRes = pMatStep;
+            }
+            else
+            {
+                delete pMatStep;
+            }
 
-    // cout << "Finding doublet: opt=" << opt << ", num of doublet so far: " <<
-    // numDoublesUsed+1 << ", current matrix: "; pMatRes->Dump();
-    // try to find the best single doublet row to expand
-    double optLoop = HAP_MAX_INT * 1.0;
-    ScistGenGenotypeMat *pMatLoop = NULL;
-    int indexDouble = -1;
-    for (int i = 0; i < pMatRes->GetNumHaps(); ++i) {
-      // cout << "i = " << i << endl;
-      // now work with the chosen subset
-      set<int> rowsDoubles;
-      rowsDoubles.insert(i);
-      //
-      double optStep = 0.0;
-      ScistGenGenotypeMat *pMatStep =
-          EvalGenoDoubletSet(*pMatRes, rowsDoubles, optStep);
-      if (pMatStep != NULL) {
-        // cout << "Stepwise matrix: ";
-        // pMatStep->Dump();
-        // ScistHaplotypeMat *pMatResHap = dynamic_cast<ScistHaplotypeMat
-        // *>(pMatStep); string strTreeEdgeLabel1 =
-        // ConsRootedPerfectPhylogenyFromMat(pMatResHap->GetHapMat(), true,
-        // true); cout << "Stepwise tree: " << strTreeEdgeLabel1 << endl; cout
-        // << "for genotype: " << i << ", optStep: " << optStep << endl;
-        if (optStep < optLoop) {
-          // cout << "BETTER\n";
-          optLoop = optStep;
-          if (pMatLoop != NULL) {
-            delete pMatLoop;
-          }
-          pMatLoop = pMatStep;
-          indexDouble = i;
-        } else {
-          delete pMatStep;
+            if (GetNextCombo(szDoubletsStep, (int)setCandidates.size(), posvec) == false)
+            {
+                break;
+            }
         }
-      }
-    }
-    if (indexDouble < 0) {
-      break;
     }
-    if (optLoop >= opt) {
-      // YW: 08/22/18, now force to have the same number of doublets
-      // break;
-    }
-    if (pMatLoop == NULL) {
-      break;
+    YW_ASSERT_INFO(pMatRes != NULL, "Resulting matrix: not found");
+    cout << "**** Optimal cost for doublet resoultion: " << opt << endl;
+    if (fVerbose)
+    {
+        pMatRes->OutputImput();
     }
+    string strTree = pMatRes->ConsTree();
+    cout << "Constructed single cell phylogeny: " << strTree << endl;
 
-    opt = optLoop;
-    optFinal = optLoop;
-    YW_ASSERT_INFO(pMatLoop != NULL, "Cannot be null");
-    YW_ASSERT_INFO(indexDouble >= 0, "Wrong");
-    // cout << "pMatLoop: ";
-    // pMatLoop->Dump();
-    ScistGenGenotypeMat *pMatLoopConv =
-        CreateGnoesWithDouble(*pMatRes, indexDouble, *pMatLoop);
-    // cout << "Converted matrix: ";
-    // pMatLoopConv->Dump();
-
-    delete pMatLoop;
-
-    if (IsOverImpute(*pMatLoopConv) == true) {
-      delete pMatLoopConv;
-      break;
-    }
+    if (this->fVerbose)
+    {
+        // keep track of imputation results
+        ScistGenGenotypeMat *pMatImpute = genosInput.Copy();
+        std::map<int, std::set<int>> mapDoublets;
+        FindOrigImputedGeno(*pMatRes, *pMatImpute, mapDoublets);
+
+        //
+        cout << "Doublet genotypes (1-based)): <original dobule genotype> : <list of expanded doublet rows in imputed matrix>\n";
+        for (map<int, set<int>>::iterator it = mapDoublets.begin(); it != mapDoublets.end(); ++it)
+        {
+            cout << it->first << " : ";
+            for (set<int>::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
+            {
+                cout << *it2 + 1 << " ";
+            }
+            cout << endl;
+        }
+
+        // also output the imputaton results
+        cout << "Imputed genotypes: \n";
+        pMatImpute->OutputImput();
+
+        set<pair<pair<int, int>, int>> listChangedPlaces;
+        for (int i = 0; i < genosInput.GetNumHaps(); ++i)
+        {
+            for (int j = 0; j < genosInput.GetNumSites(); ++j)
+            {
+                if (genosInput.GetGenotypeAt(i, j) != pMatImpute->GetGenotypeAt(i, j))
+                {
+                    pair<int, int> pp(i, j);
+                    pair<pair<int, int>, int> pp1(pp, pMatImpute->GetGenotypeAt(i, j));
+                    listChangedPlaces.insert(pp1);
+                }
+            }
+        }
+        cout << "List of corrected genotypes (site, cell, new genotype) in base-1: \n";
+        for (set<pair<pair<int, int>, int>>::iterator it = listChangedPlaces.begin(); it != listChangedPlaces.end(); ++it)
+        {
+            cout << "[ " << setw(6) << it->first.second + 1 << " " << setw(6) << it->first.first + 1 << " ]: " << it->second << endl;
+        }
 
-    if (pMatRes != NULL) {
-      delete pMatRes;
+        delete pMatImpute;
     }
-    pMatRes = pMatLoopConv;
 
-    ++numDoublesUsed;
-  }
+    delete pMatRes;
+}
 
-  YW_ASSERT_INFO(pMatRes != NULL, "Resulting matrix: not found");
-  cout << "**** Optimal cost for doublet resoultion: " << optFinal << endl;
-  if (fVerbose) {
-    pMatRes->OutputImput();
+void ScistDoubletSearch ::SearchInc()
+{
+    // search incrementally for doublets
+    ScistGenGenotypeMat *pMatRes = this->genosInput.Copy();
+    double optFinal = 1.0 * HAP_MAX_INT;
+    bool fInit = false;
 
-    // analyze doublets
-    int numDoublets = 0;
-    for (int h = 0; h < pMatRes->GetNumHaps(); ++h) {
-      string strName = pMatRes->GetGenotypeName(h);
-      string strLastChar = strName.substr(strName.length() - 1, 1);
-      if (strLastChar == "'") {
-        //
-        string strNameOrig = GetGenoDoubleRowName(strName);
-        cout << "Doublet: imputed haplotype " << h + 1
-             << " (with assigned name " << strName
-             << ") is a doublet from cell " << strNameOrig << endl;
-        ++numDoublets;
-      }
+    int numDoublesUsed = 0;
+    while (numDoublesUsed < this->maxDoubletSubsetSz)
+    {
+        double opt = HAP_MAX_INT * 1.0;
+        set<int> rowsDoublesEmpty;
+        ScistGenGenotypeMat *pMatInitDump = EvalGenoDoubletSet(*pMatRes, rowsDoublesEmpty, opt);
+        YW_ASSERT_INFO(pMatInitDump != NULL, "Cannot be null");
+        //cout << "pMatInitDump: ";
+        //pMatInitDump->Dump();
+        //ScistHaplotypeMat *pMatResHap0 = dynamic_cast<ScistHaplotypeMat *>(pMatInitDump);
+        //string strTreeEdgeLabel0 = ConsRootedPerfectPhylogenyFromMat(pMatResHap0->GetHapMat(), true, true);
+        //cout << "Stepwise tree: " << strTreeEdgeLabel0 << endl;
+        delete pMatInitDump;
+
+        if (fInit == false)
+        {
+            fInit = true;
+            optFinal = opt;
+        }
+
+        //cout << "Finding doublet: opt=" << opt << ", num of doublet so far: " << numDoublesUsed+1 << ", current matrix: ";
+        //pMatRes->Dump();
+        // try to find the best single doublet row to expand
+        double optLoop = HAP_MAX_INT * 1.0;
+        ScistGenGenotypeMat *pMatLoop = NULL;
+        int indexDouble = -1;
+        for (int i = 0; i < pMatRes->GetNumHaps(); ++i)
+        {
+            //cout << "i = " << i << endl;
+            // now work with the chosen subset
+            set<int> rowsDoubles;
+            rowsDoubles.insert(i);
+            //
+            double optStep = 0.0;
+            ScistGenGenotypeMat *pMatStep = EvalGenoDoubletSet(*pMatRes, rowsDoubles, optStep);
+            if (pMatStep != NULL)
+            {
+                //cout << "Stepwise matrix: ";
+                //pMatStep->Dump();
+                //ScistHaplotypeMat *pMatResHap = dynamic_cast<ScistHaplotypeMat *>(pMatStep);
+                //string strTreeEdgeLabel1 = ConsRootedPerfectPhylogenyFromMat(pMatResHap->GetHapMat(), true, true);
+                //cout << "Stepwise tree: " << strTreeEdgeLabel1 << endl;
+                //cout << "for genotype: " << i << ", optStep: " << optStep << endl;
+                if (optStep < optLoop)
+                {
+                    //cout << "BETTER\n";
+                    optLoop = optStep;
+                    if (pMatLoop != NULL)
+                    {
+                        delete pMatLoop;
+                    }
+                    pMatLoop = pMatStep;
+                    indexDouble = i;
+                }
+                else
+                {
+                    delete pMatStep;
+                }
+            }
+        }
+        if (indexDouble < 0)
+        {
+            break;
+        }
+        if (optLoop >= opt)
+        {
+            // YW: 08/22/18, now force to have the same number of doublets
+            //break;
+        }
+        if (pMatLoop == NULL)
+        {
+            break;
+        }
+
+        opt = optLoop;
+        optFinal = optLoop;
+        YW_ASSERT_INFO(pMatLoop != NULL, "Cannot be null");
+        YW_ASSERT_INFO(indexDouble >= 0, "Wrong");
+        //cout << "pMatLoop: ";
+        //pMatLoop->Dump();
+        ScistGenGenotypeMat *pMatLoopConv = CreateGnoesWithDouble(*pMatRes, indexDouble, *pMatLoop);
+        //cout << "Converted matrix: ";
+        //pMatLoopConv->Dump();
+
+        delete pMatLoop;
+
+        if (IsOverImpute(*pMatLoopConv) == true)
+        {
+            delete pMatLoopConv;
+            break;
+        }
+
+        if (pMatRes != NULL)
+        {
+            delete pMatRes;
+        }
+        pMatRes = pMatLoopConv;
+
+        ++numDoublesUsed;
+    }
+
+    YW_ASSERT_INFO(pMatRes != NULL, "Resulting matrix: not found");
+    cout << "**** Optimal cost for doublet resoultion: " << optFinal << endl;
+    if (fVerbose)
+    {
+        pMatRes->OutputImput();
+
+        // analyze doublets
+        int numDoublets = 0;
+        for (int h = 0; h < pMatRes->GetNumHaps(); ++h)
+        {
+            string strName = pMatRes->GetGenotypeName(h);
+            string strLastChar = strName.substr(strName.length() - 1, 1);
+            if (strLastChar == "'")
+            {
+                //
+                string strNameOrig = GetGenoDoubleRowName(strName);
+                cout << "Doublet: imputed haplotype " << h + 1 << " (with assigned name " << strName << ") is a doublet from cell " << strNameOrig << endl;
+                ++numDoublets;
+            }
+        }
+        cout << "Number of found doublets: " << numDoublets << endl;
     }
-    cout << "Number of found doublets: " << numDoublets << endl;
-  }
-  if (fOutputPPWithEdgeLabels) {
-    // cout << "Imputed genotypes: ";
-    // pMatRes->Dump();
-    OutputMutTree(*pMatRes);
+    if (fOutputPPWithEdgeLabels)
+    {
+        //cout << "Imputed genotypes: ";
+        //pMatRes->Dump();
+        OutputMutTree(*pMatRes);
 
 #if 0
         ScistHaplotypeMat *pMatResHap = dynamic_cast<ScistHaplotypeMat *>(pMatRes);
@@ -641,15 +656,15 @@ void ScistDoubletSearch ::SearchInc() {
             OutputMutationTree( this->strMutTreeFileName.c_str(), strMutTreeConv, true );
         }
 #endif
-  }
+    }
 
-  // string strTree = pMatRes->ConsTree();
-  // cout << "Constructed single cell phylogeny: " << strTree << endl;
-  string strNW;
-  double likeliOpt = ConsTree(*pMatRes, strNW);
-  // cout << "Optimal log-likelihood is " << likeliOpt << endl;
-  cout << "**** Maximum log-likelihood: " << likeliOpt << endl;
-  cout << "Constructed single cell phylogeny: " << strNW << endl;
+    //string strTree = pMatRes->ConsTree();
+    //cout << "Constructed single cell phylogeny: " << strTree << endl;
+    string strNW;
+    double likeliOpt = ConsTree(*pMatRes, strNW);
+    //cout << "Optimal log-likelihood is " << likeliOpt << endl;
+    cout << "**** Maximum log-likelihood: " << likeliOpt << endl;
+    cout << "Constructed single cell phylogeny: " << strNW << endl;
 
 #if 0
     if( this->fVerbose )
@@ -698,181 +713,194 @@ void ScistDoubletSearch ::SearchInc() {
     }
 #endif
 
-  delete pMatRes;
+    delete pMatRes;
 }
 
-double ScistDoubletSearch ::ConsTree(ScistGenGenotypeMat &genosNoDoublets,
-                                     std::string &strNW) const {
-  //
-  ScistPerfPhyMLE sciInf1(genosNoDoublets);
-  sciInf1.SetOutput(false);
-  sciInf1.SetVerbose(false);
-  std::set<std::pair<std::pair<int, int>, int> > listChangedPlaces;
-  std::string strTreeNW;
-  double opt = sciInf1.Infer(&listChangedPlaces, &strTreeNW);
-  // cout << "Before mapping: inferred tree is " << strTreeNW << endl;
-  // now remap
-  map<string, string> mapIdToOrig;
-  for (int h = 0; h < genosNoDoublets.GetNumHaps(); ++h) {
-    string idCur = std::to_string(h + 1);
-    string idMapped = genosNoDoublets.GetGenotypeName(h);
-    mapIdToOrig[idCur] = idMapped;
-    // cout << idCur << " mapped to " << idMapped << endl;
-  }
-  strNW = strTreeNW;
-  NewickUtils::UpdateLabells(strNW, mapIdToOrig);
-  // cout << "After mapping, inferred tree is: " << strNW << endl;
-  return opt;
+double ScistDoubletSearch ::ConsTree(ScistGenGenotypeMat &genosNoDoublets, std::string &strNW) const
+{
+    //
+    ScistPerfPhyMLE sciInf1(genosNoDoublets);
+    sciInf1.SetOutput(false);
+    sciInf1.SetVerbose(false);
+    std::set<std::pair<std::pair<int, int>, int>> listChangedPlaces;
+    std::string strTreeNW;
+    double opt = sciInf1.Infer(&listChangedPlaces, &strTreeNW);
+    //cout << "Before mapping: inferred tree is " << strTreeNW << endl;
+    // now remap
+    map<string, string> mapIdToOrig;
+    for (int h = 0; h < genosNoDoublets.GetNumHaps(); ++h)
+    {
+        string idCur = std::to_string(h + 1);
+        string idMapped = genosNoDoublets.GetGenotypeName(h);
+        mapIdToOrig[idCur] = idMapped;
+        //cout << idCur << " mapped to " << idMapped << endl;
+    }
+    strNW = strTreeNW;
+    NewickUtils::UpdateLabells(strNW, mapIdToOrig);
+    //cout << "After mapping, inferred tree is: " << strNW << endl;
+    return opt;
 }
 
-static string GetNonDoubleName(const string &strTaxon) {
-  int posLast = (int)strTaxon.length() - 1;
-  while (posLast >= 0) {
-    string str = strTaxon.substr(posLast, 1);
-    if (str == "'") {
-      break;
+static string GetNonDoubleName(const string &strTaxon)
+{
+    int posLast = (int)strTaxon.length() - 1;
+    while (posLast >= 0)
+    {
+        string str = strTaxon.substr(posLast, 1);
+        if (str == "'")
+        {
+            break;
+        }
+        --posLast;
     }
-    --posLast;
-  }
-  //
-  YW_ASSERT_INFO(posLast >= 0, "Fail111");
-  return strTaxon.substr(0, posLast + 1);
+    //
+    YW_ASSERT_INFO(posLast >= 0, "Fail111");
+    return strTaxon.substr(0, posLast + 1);
 }
 
-bool ScistDoubletSearch ::IsOverImpute(
-    const ScistGenGenotypeMat &genosDbl) const {
-  // simple rule: if it use the same row again, then it overimputes
-  for (int h = 0; h < genosDbl.GetNumHaps(); ++h) {
-    string strName = genosDbl.GetGenotypeName(h);
-    string strLastChar = strName.substr(strName.length() - 1, 1);
-    string str2ndLastChar;
-    if (strName.length() >= 2) {
-      str2ndLastChar = strName.substr(strName.length() - 2, 1);
-    }
-    if (strLastChar == "'" && str2ndLastChar == "'") {
-      //
-      return true;
+bool ScistDoubletSearch ::IsOverImpute(const ScistGenGenotypeMat &genosDbl) const
+{
+    // simple rule: if it use the same row again, then it overimputes
+    for (int h = 0; h < genosDbl.GetNumHaps(); ++h)
+    {
+        string strName = genosDbl.GetGenotypeName(h);
+        string strLastChar = strName.substr(strName.length() - 1, 1);
+        string str2ndLastChar;
+        if (strName.length() >= 2)
+        {
+            str2ndLastChar = strName.substr(strName.length() - 2, 1);
+        }
+        if (strLastChar == "'" && str2ndLastChar == "'")
+        {
+            //
+            return true;
+        }
     }
-  }
-  return false;
+    return false;
 }
 
-void ScistDoubletSearch ::FindDoubletHapsInMat(
-    const ScistGenGenotypeMat &genosDbl, std::set<int> &setHapsDoubles) const {
-  //
-  setHapsDoubles.clear();
-  set<string> setDoubles;
-  for (int h = 0; h < genosDbl.GetNumHaps(); ++h) {
-    string strName = genosDbl.GetGenotypeName(h);
-    string strLastChar = strName.substr(strName.length() - 1, 1);
-    if (strLastChar == "'") {
-      //
-      string strNameOrig = GetGenoDoubleRowName(strName);
-      setDoubles.insert(strNameOrig);
-      setHapsDoubles.insert(h);
+void ScistDoubletSearch ::FindDoubletHapsInMat(const ScistGenGenotypeMat &genosDbl, std::set<int> &setHapsDoubles) const
+{
+    //
+    setHapsDoubles.clear();
+    set<string> setDoubles;
+    for (int h = 0; h < genosDbl.GetNumHaps(); ++h)
+    {
+        string strName = genosDbl.GetGenotypeName(h);
+        string strLastChar = strName.substr(strName.length() - 1, 1);
+        if (strLastChar == "'")
+        {
+            //
+            string strNameOrig = GetGenoDoubleRowName(strName);
+            setDoubles.insert(strNameOrig);
+            setHapsDoubles.insert(h);
+        }
     }
-  }
-  for (int h = 0; h < genosDbl.GetNumHaps(); ++h) {
-    string strName = genosDbl.GetGenotypeName(h);
-    if (setDoubles.find(strName) != setDoubles.end()) {
-      //
-      setHapsDoubles.insert(h);
+    for (int h = 0; h < genosDbl.GetNumHaps(); ++h)
+    {
+        string strName = genosDbl.GetGenotypeName(h);
+        if (setDoubles.find(strName) != setDoubles.end())
+        {
+            //
+            setHapsDoubles.insert(h);
+        }
     }
-  }
 }
 
-void ScistDoubletSearch ::OutputMutTree(
-    ScistGenGenotypeMat &genosNoDoublets) const {
-  // output the matrix
-  ScistGenGenotypeMat *pMatRes = genosNoDoublets.Copy();
-
-  // YW: 05/16/19 try to make tree inference with doublet more accurate
-  // set all doublets haplotypes to be uncertain
-  // analyze doublets
-  set<int> setHapsDoubles;
-  FindDoubletHapsInMat(*pMatRes, setHapsDoubles);
-  // cout << "Set of doublet haplotypes: ";
-  // DumpIntSet(setHapsDoubles);
-
-  // now set uncertain haps to those positions
-  for (set<int>::iterator it = setHapsDoubles.begin();
-       it != setHapsDoubles.end(); ++it) {
-    for (int s = 0; s < pMatRes->GetNumSites(); ++s) {
-      double probOld = pMatRes->GetGenotypeProbAllele0At(*it, s);
-      if (probOld < 0.5) {
-        pMatRes->SetGenotypeProbAt(*it, s, probOld / 2 + 0.25);
-      }
-
-      // pMatRes->SetGenotypeProbAt(*it, s, 0.5);
-      //}
-      // else
-      //{
-      //    pMatRes->SetGenotypeProbAt(*it, s, 0.7);
-      //}
+void ScistDoubletSearch ::OutputMutTree(ScistGenGenotypeMat &genosNoDoublets) const
+{
+    // output the matrix
+    ScistGenGenotypeMat *pMatRes = genosNoDoublets.Copy();
+
+    // YW: 05/16/19 try to make tree inference with doublet more accurate
+    // set all doublets haplotypes to be uncertain
+    // analyze doublets
+    set<int> setHapsDoubles;
+    FindDoubletHapsInMat(*pMatRes, setHapsDoubles);
+    //cout << "Set of doublet haplotypes: ";
+    //DumpIntSet(setHapsDoubles);
+
+    // now set uncertain haps to those positions
+    for (set<int>::iterator it = setHapsDoubles.begin(); it != setHapsDoubles.end(); ++it)
+    {
+        for (int s = 0; s < pMatRes->GetNumSites(); ++s)
+        {
+            double probOld = pMatRes->GetGenotypeProbAllele0At(*it, s);
+            if (probOld < 0.5)
+            {
+                pMatRes->SetGenotypeProbAt(*it, s, probOld / 2 + 0.25);
+            }
+
+            //pMatRes->SetGenotypeProbAt(*it, s, 0.5);
+            //}
+            //else
+            //{
+            //    pMatRes->SetGenotypeProbAt(*it, s, 0.7);
+            //}
+        }
+    }
+    //cout << "After revision, genotype matrix: ";
+    //pMatRes->Dump();
+
+    //
+    ScistPerfPhyMLE sciInf1(*pMatRes);
+    sciInf1.SetOutput(false);
+    sciInf1.SetVerbose(false);
+    std::set<std::pair<std::pair<int, int>, int>> listChangedPlaces;
+    std::string strTreeNW;
+    //double opt =
+    sciInf1.Infer(&listChangedPlaces, &strTreeNW);
+    //cout << "Before mapping: inferred tree is " << strTreeNW << endl;
+
+    pMatRes->ChangeGenosAtPositions(listChangedPlaces);
+    //if( fVerbose )
+    //{
+    //    cout << "Called genotypes\n";
+    //    pMatRes->OutputImput();
+    //}
+    ScistHaplotypeMat *pMatResHap = dynamic_cast<ScistHaplotypeMat *>(pMatRes);
+    if (pMatResHap == NULL)
+    {
+        cout << "** Right now, only output perfect phylogeny for binary genotypes\n";
+    }
+    else
+    {
+        string strTreeEdgeLabel = ConsRootedPerfectPhylogenyFromMat(pMatResHap->GetHapMat(), true, true);
+        //cout << "** Perfect phylogeny (with sites labeled on edges) from the imputed genotypes: " << strTreeEdgeLabel << endl;
+
+        string strMutTree = ConsEdgeLabeTree(strTreeEdgeLabel);
+        string strMutTreeConv = ConvMutTreeStr(strMutTree);
+        cout << "^^ Mutation tree: " << strMutTreeConv << endl;
+
+        // output mutation tree file
+        OutputMutationTree(this->strMutTreeFileName.c_str(), strMutTreeConv, true);
     }
-  }
-  // cout << "After revision, genotype matrix: ";
-  // pMatRes->Dump();
-
-  //
-  ScistPerfPhyMLE sciInf1(*pMatRes);
-  sciInf1.SetOutput(false);
-  sciInf1.SetVerbose(false);
-  std::set<std::pair<std::pair<int, int>, int> > listChangedPlaces;
-  std::string strTreeNW;
-  // double opt =
-  sciInf1.Infer(&listChangedPlaces, &strTreeNW);
-  // cout << "Before mapping: inferred tree is " << strTreeNW << endl;
-
-  pMatRes->ChangeGenosAtPositions(listChangedPlaces);
-  // if( fVerbose )
-  //{
-  //    cout << "Called genotypes\n";
-  //    pMatRes->OutputImput();
-  //}
-  ScistHaplotypeMat *pMatResHap = dynamic_cast<ScistHaplotypeMat *>(pMatRes);
-  if (pMatResHap == NULL) {
-    cout
-        << "** Right now, only output perfect phylogeny for binary genotypes\n";
-  } else {
-    string strTreeEdgeLabel =
-        ConsRootedPerfectPhylogenyFromMat(pMatResHap->GetHapMat(), true, true);
-    // cout << "** Perfect phylogeny (with sites labeled on edges) from the
-    // imputed genotypes: " << strTreeEdgeLabel << endl;
-
-    string strMutTree = ConsEdgeLabeTree(strTreeEdgeLabel);
-    string strMutTreeConv = ConvMutTreeStr(strMutTree);
-    cout << "^^ Mutation tree: " << strMutTreeConv << endl;
-
-    // output mutation tree file
-    OutputMutationTree(this->strMutTreeFileName.c_str(), strMutTreeConv, true);
-  }
-
-  delete pMatRes;
+
+    delete pMatRes;
 }
 
-ScistGenGenotypeMat *ScistDoubletSearch ::CreateGnoesWithDouble(
-    const ScistGenGenotypeMat &genosOrig, int indexDouble,
-    const ScistGenGenotypeMat &genosDoubleInfer) const {
-  // cout << "CreateGnoesWithDouble: genosOrig: ";
-  // genosOrig.Dump();
-  // cout << "indexDouble: " << indexDouble << endl;
-  // cout << "genosDoubleInfer: ";
-  // genosDoubleInfer.Dump();
-
-  // create a new genotype matrix w/ doublets
-  ScistGenGenotypeMat *pResMat = genosOrig.CreateNewMat();
-  pResMat->SetSize(genosOrig.GetNumHaps() + 1, genosOrig.GetNumSites());
-
-  // fill in old values
-  for (int i = 0; i < genosOrig.GetNumHaps(); ++i) {
-    pResMat->SetGenotypeName(i, genosOrig.GetGenotypeName(i));
-    for (int j = 0; j < genosOrig.GetNumSites(); ++j) {
-      pResMat->SetGenotypeAt(i, j, genosOrig.GetGenotypeAt(i, j));
-      pResMat->SetGenotypeProbAt(i, j,
-                                 genosOrig.GetGenotypeProbAllele0At(i, j));
+ScistGenGenotypeMat *ScistDoubletSearch ::CreateGnoesWithDouble(const ScistGenGenotypeMat &genosOrig, int indexDouble, const ScistGenGenotypeMat &genosDoubleInfer) const
+{
+    //cout << "CreateGnoesWithDouble: genosOrig: ";
+    //genosOrig.Dump();
+    //cout << "indexDouble: " << indexDouble << endl;
+    //cout << "genosDoubleInfer: ";
+    //genosDoubleInfer.Dump();
+
+    // create a new genotype matrix w/ doublets
+    ScistGenGenotypeMat *pResMat = genosOrig.CreateNewMat();
+    pResMat->SetSize(genosOrig.GetNumHaps() + 1, genosOrig.GetNumSites());
+
+    // fill in old values
+    for (int i = 0; i < genosOrig.GetNumHaps(); ++i)
+    {
+        pResMat->SetGenotypeName(i, genosOrig.GetGenotypeName(i));
+        for (int j = 0; j < genosOrig.GetNumSites(); ++j)
+        {
+            pResMat->SetGenotypeAt(i, j, genosOrig.GetGenotypeAt(i, j));
+            pResMat->SetGenotypeProbAt(i, j, genosOrig.GetGenotypeProbAllele0At(i, j));
+        }
     }
-  }
 #if 0
     // fill in imputed values values
     for(int i=0; i<genosDoubleInfer.GetNumHaps()-1; ++i)
@@ -891,406 +919,410 @@ ScistGenGenotypeMat *ScistDoubletSearch ::CreateGnoesWithDouble(
     }
 #endif
 
-  // fill in imputed dobulet genos (two last rows)
-  pResMat->SetGenotypeName(genosOrig.GetNumHaps(),
-                           GetNewGenoDoubleRowName(genosOrig, indexDouble));
-  for (int s = 0; s < genosOrig.GetNumSites(); ++s) {
-    double p0 = genosOrig.GetGenotypeProbAllele0At(indexDouble, s);
-    int g1 =
-        genosDoubleInfer.GetGenotypeAt(genosDoubleInfer.GetNumHaps() - 2, s);
-    pResMat->SetGenotypeAt(indexDouble, s, g1);
-    double p0Use1 = p0;
-    if ((g1 == 0 && p0 < 0.5) || (g1 == 1 && p0 > 0.5)) {
-      p0Use1 = 1.0 - p0;
-    }
-    pResMat->SetGenotypeProbAt(indexDouble, s, p0Use1);
-    int g2 =
-        genosDoubleInfer.GetGenotypeAt(genosDoubleInfer.GetNumHaps() - 1, s);
-    pResMat->SetGenotypeAt(genosOrig.GetNumHaps(), s, g2);
-    double p0Use2 = p0;
-    if ((g2 == 0 && p0 < 0.5) || (g2 == 1 && p0 > 0.5)) {
-      p0Use2 = 1.0 - p0;
+    // fill in imputed dobulet genos (two last rows)
+    pResMat->SetGenotypeName(genosOrig.GetNumHaps(), GetNewGenoDoubleRowName(genosOrig, indexDouble));
+    for (int s = 0; s < genosOrig.GetNumSites(); ++s)
+    {
+        double p0 = genosOrig.GetGenotypeProbAllele0At(indexDouble, s);
+        int g1 = genosDoubleInfer.GetGenotypeAt(genosDoubleInfer.GetNumHaps() - 2, s);
+        pResMat->SetGenotypeAt(indexDouble, s, g1);
+        double p0Use1 = p0;
+        if ((g1 == 0 && p0 < 0.5) || (g1 == 1 && p0 > 0.5))
+        {
+            p0Use1 = 1.0 - p0;
+        }
+        pResMat->SetGenotypeProbAt(indexDouble, s, p0Use1);
+        int g2 = genosDoubleInfer.GetGenotypeAt(genosDoubleInfer.GetNumHaps() - 1, s);
+        pResMat->SetGenotypeAt(genosOrig.GetNumHaps(), s, g2);
+        double p0Use2 = p0;
+        if ((g2 == 0 && p0 < 0.5) || (g2 == 1 && p0 > 0.5))
+        {
+            p0Use2 = 1.0 - p0;
+        }
+        pResMat->SetGenotypeProbAt(genosOrig.GetNumHaps(), s, p0Use2);
     }
-    pResMat->SetGenotypeProbAt(genosOrig.GetNumHaps(), s, p0Use2);
-  }
 
-  return pResMat;
+    return pResMat;
 }
 
 // construct matrix that is constructed from doublet result
-void ScistDoubletSearch ::FindOrigImputedGeno(
-    const ScistGenGenotypeMat &genosDoubletRes,
-    ScistGenGenotypeMat &genosImpute,
-    std::map<int, std::set<int> > &mapDoublets) const {
-  // cout << "FindOrigImputedGeno: genosDoubletRes: ";
-  // genosDoubletRes.Dump();
-  mapDoublets.clear();
-  // match any row
-  map<string, int> mapNameToRowIndexDouble;
-  for (int i = 0; i < genosImpute.GetNumHaps(); ++i) {
-    //
-    mapNameToRowIndexDouble[genosImpute.GetGenotypeName(i)] = i;
-  }
-
-  // first copy any row that is not double
-  set<int> rowsDouble;
-  for (int i = 0; i < genosDoubletRes.GetNumHaps(); ++i) {
-    if (mapNameToRowIndexDouble.find(genosDoubletRes.GetGenotypeName(i)) !=
-        mapNameToRowIndexDouble.end()) {
-      // copy
-      int index = mapNameToRowIndexDouble[genosDoubletRes.GetGenotypeName(i)];
-      for (int j = 0; j < genosDoubletRes.GetNumSites(); ++j) {
-        genosImpute.SetGenotypeAt(index, j,
-                                  genosDoubletRes.GetGenotypeAt(i, j));
-      }
-    } else {
-      rowsDouble.insert(i);
+void ScistDoubletSearch ::FindOrigImputedGeno(const ScistGenGenotypeMat &genosDoubletRes, ScistGenGenotypeMat &genosImpute, std::map<int, std::set<int>> &mapDoublets) const
+{
+    //cout << "FindOrigImputedGeno: genosDoubletRes: ";
+    //genosDoubletRes.Dump();
+    mapDoublets.clear();
+    // match any row
+    map<string, int> mapNameToRowIndexDouble;
+    for (int i = 0; i < genosImpute.GetNumHaps(); ++i)
+    {
+        //
+        mapNameToRowIndexDouble[genosImpute.GetGenotypeName(i)] = i;
     }
-  }
-  // cout << "RowsDouble: ";
-  // DumpIntSet(rowsDouble);
-  // now add those
-  for (set<int>::iterator it = rowsDouble.begin(); it != rowsDouble.end();
-       ++it) {
-    int i = *it;
-    string strName = GetGenoDoubleRowName(genosDoubletRes.GetGenotypeName(i));
-    YW_ASSERT_INFO(mapNameToRowIndexDouble.find(strName) !=
-                       mapNameToRowIndexDouble.end(),
-                   "Fail to find the row");
-
-    // copy
-    int index = mapNameToRowIndexDouble[strName];
-    for (int j = 0; j < genosDoubletRes.GetNumSites(); ++j) {
-      genosImpute.AddGenotypeAt(index, j, genosDoubletRes.GetGenotypeAt(i, j));
+
+    // first copy any row that is not double
+    set<int> rowsDouble;
+    for (int i = 0; i < genosDoubletRes.GetNumHaps(); ++i)
+    {
+        if (mapNameToRowIndexDouble.find(genosDoubletRes.GetGenotypeName(i)) != mapNameToRowIndexDouble.end())
+        {
+            // copy
+            int index = mapNameToRowIndexDouble[genosDoubletRes.GetGenotypeName(i)];
+            for (int j = 0; j < genosDoubletRes.GetNumSites(); ++j)
+            {
+                genosImpute.SetGenotypeAt(index, j, genosDoubletRes.GetGenotypeAt(i, j));
+            }
+        }
+        else
+        {
+            rowsDouble.insert(i);
+        }
     }
+    //cout << "RowsDouble: ";
+    //DumpIntSet(rowsDouble);
+    // now add those
+    for (set<int>::iterator it = rowsDouble.begin(); it != rowsDouble.end(); ++it)
+    {
+        int i = *it;
+        string strName = GetGenoDoubleRowName(genosDoubletRes.GetGenotypeName(i));
+        YW_ASSERT_INFO(mapNameToRowIndexDouble.find(strName) != mapNameToRowIndexDouble.end(), "Fail to find the row");
 
-    // record it
-    int strNameInt = std::stoi(strName);
-    mapDoublets[strNameInt].insert(i);
-    mapDoublets[strNameInt].insert(genosDoubletRes.FindCellByName(strName));
-  }
+        // copy
+        int index = mapNameToRowIndexDouble[strName];
+        for (int j = 0; j < genosDoubletRes.GetNumSites(); ++j)
+        {
+            genosImpute.AddGenotypeAt(index, j, genosDoubletRes.GetGenotypeAt(i, j));
+        }
+
+        // record it
+        int strNameInt = std::stoi(strName);
+        mapDoublets[strNameInt].insert(i);
+        mapDoublets[strNameInt].insert(genosDoubletRes.FindCellByName(strName));
+    }
 }
 
-string ScistDoubletSearch ::GetGenoDoubleRowName(const string &strName) const {
-  // if last character is '
-  if (strName.length() > 0 && strName.substr(strName.length() - 1, 1) == "'") {
-    // return the portion that doesn't have trailing '
-    int posNone = strName.find_last_not_of("'");
-    return strName.substr(0, posNone + 1);
-  }
-  YW_ASSERT_INFO(false, "The row is doublet");
-  string strDummy;
-  return strDummy;
+string ScistDoubletSearch ::GetGenoDoubleRowName(const string &strName) const
+{
+    // if last character is '
+    if (strName.length() > 0 && strName.substr(strName.length() - 1, 1) == "'")
+    {
+        // return the portion that doesn't have trailing '
+        int posNone = strName.find_last_not_of("'");
+        return strName.substr(0, posNone + 1);
+    }
+    YW_ASSERT_INFO(false, "The row is doublet");
+    string strDummy;
+    return strDummy;
 }
 
-ScistGenGenotypeMat *
-ScistDoubletSearch ::EvalGenoDoubletSet(const ScistGenGenotypeMat &matToSearch,
-                                        const set<int> &setDoubletRows,
-                                        double &resOpt) {
-  //
-  resOpt = setDoubletRows.size() * this->costDoublet;
-  set<int> setDoubleRowsConv;
-  double costInit = 0.0;
-  ScistGenGenotypeMat *pMatDouble = InitSearchGenotypes(
-      matToSearch, setDoubletRows, setDoubleRowsConv, costInit);
-  resOpt += costInit;
-  // cout << "costInit: " << costInit << ", matrixDouble: ";
-  // pMatDouble->Dump();
-
-  if (setDoubletRows.size() == 0) {
-    return pMatDouble;
-  }
-
-  // now score doublet
-  set<int> rowsTemplate;
-  PopulateSetWithInterval(rowsTemplate, 0, pMatDouble->GetNumHaps() - 1);
-  SubtractSets(rowsTemplate, setDoubleRowsConv);
-
-  // each time pick the lowest cost change to resolve doublets
-  while (setDoubleRowsConv.size() > 0) {
-    // cout << "setDoubleRowsConv: ";
-    // DumpIntSet(setDoubleRowsConv);
-    // cout << "rowsTemplate: ";
-    // DumpIntSet(rowsTemplate);
-    // evaluate each
-    set<int> rowsDone;
-
-    double optBest = HAP_MAX_INT * 1.0;
-    vector<int> vecHap1, vecHap2;
-    int rowBest = -1;
-    for (set<int>::iterator it = setDoubleRowsConv.begin();
-         it != setDoubleRowsConv.end(); ++it) {
-      if (rowsDone.find(*it) != rowsDone.end()) {
-        continue;
-      }
-
-      vector<int> vecHap1Step, vecHap2Step;
-      double optStep = ScoreDoubletRow(pMatDouble, rowsTemplate, *it,
-                                       vecHap1Step, vecHap2Step);
-
-      // cout << "ScoreDoubleRow for row " << *it << ", two resolved haplotypes:
-      // "; DumpIntVec(vecHap1Step); DumpIntVec(vecHap2Step);
-
-      // if there is no change of doublets, stop
-      if (IsAllZeroVec(vecHap1Step) || IsAllZeroVec(vecHap2Step) ||
-          vecHap1Step == vecHap2Step) {
-        // this is trivial doublet, stop
-        break;
-      }
-
-      if (optStep < optBest) {
-        optBest = optStep;
-        vecHap1 = vecHap1Step;
-        vecHap2 = vecHap2Step;
-        rowBest = *it;
-        // cout << "better....\n";
-      }
-
-      rowsDone.insert(*it);
-      rowsDone.insert(*it + 1);
+ScistGenGenotypeMat *ScistDoubletSearch ::EvalGenoDoubletSet(const ScistGenGenotypeMat &matToSearch, const set<int> &setDoubletRows, double &resOpt)
+{
+    //
+    resOpt = setDoubletRows.size() * this->costDoublet;
+    set<int> setDoubleRowsConv;
+    double costInit = 0.0;
+    ScistGenGenotypeMat *pMatDouble = InitSearchGenotypes(matToSearch, setDoubletRows, setDoubleRowsConv, costInit);
+    resOpt += costInit;
+    //cout << "costInit: " << costInit << ", matrixDouble: ";
+    //pMatDouble->Dump();
+
+    if (setDoubletRows.size() == 0)
+    {
+        return pMatDouble;
     }
 
-    if (rowBest < 0) {
-      delete pMatDouble;
-      pMatDouble = NULL;
-      break;
+    // now score doublet
+    set<int> rowsTemplate;
+    PopulateSetWithInterval(rowsTemplate, 0, pMatDouble->GetNumHaps() - 1);
+    SubtractSets(rowsTemplate, setDoubleRowsConv);
+
+    // each time pick the lowest cost change to resolve doublets
+    while (setDoubleRowsConv.size() > 0)
+    {
+        //cout << "setDoubleRowsConv: ";
+        //DumpIntSet(setDoubleRowsConv);
+        //cout << "rowsTemplate: ";
+        //DumpIntSet(rowsTemplate);
+        // evaluate each
+        set<int> rowsDone;
+
+        double optBest = HAP_MAX_INT * 1.0;
+        vector<int> vecHap1, vecHap2;
+        int rowBest = -1;
+        for (set<int>::iterator it = setDoubleRowsConv.begin(); it != setDoubleRowsConv.end(); ++it)
+        {
+            if (rowsDone.find(*it) != rowsDone.end())
+            {
+                continue;
+            }
+
+            vector<int> vecHap1Step, vecHap2Step;
+            double optStep = ScoreDoubletRow(pMatDouble, rowsTemplate, *it, vecHap1Step, vecHap2Step);
+
+            //cout << "ScoreDoubleRow for row " << *it << ", two resolved haplotypes: ";
+            //DumpIntVec(vecHap1Step);
+            //DumpIntVec(vecHap2Step);
+
+            // if there is no change of doublets, stop
+            if (IsAllZeroVec(vecHap1Step) || IsAllZeroVec(vecHap2Step) || vecHap1Step == vecHap2Step)
+            {
+                // this is trivial doublet, stop
+                break;
+            }
+
+            if (optStep < optBest)
+            {
+                optBest = optStep;
+                vecHap1 = vecHap1Step;
+                vecHap2 = vecHap2Step;
+                rowBest = *it;
+                //cout << "better....\n";
+            }
+
+            rowsDone.insert(*it);
+            rowsDone.insert(*it + 1);
+        }
+
+        if (rowBest < 0)
+        {
+            delete pMatDouble;
+            pMatDouble = NULL;
+            break;
+        }
+
+        // take the best one
+        YW_ASSERT_INFO(rowBest >= 0, "Wrong");
+        resOpt += optBest;
+        //cout << "**Resolve double: optBest: " << optBest << ", rowBest: " << rowBest << endl;
+        //cout << "vecHap1: ";
+        //DumpIntVec(vecHap1);
+        //cout << "vecHap2: ";
+        //DumpIntVec(vecHap2);
+        UpdateSearchGenotypes(pMatDouble, rowBest, vecHap1, vecHap2);
+
+        //cout << "Evl step matrix: ";
+        //pMatDouble->Dump();
+
+        //ScistHaplotypeMat *pMatResHap0 = dynamic_cast<ScistHaplotypeMat *>(pMatDouble);
+        //string strTreeEdgeLabel0 = ConsRootedPerfectPhylogenyFromMat(pMatResHap0->GetHapMat(), true, true);
+        //cout << "EvalGenoDoubletSet tree (step): " << strTreeEdgeLabel0 << endl;
+
+        setDoubleRowsConv.erase(rowBest);
+        setDoubleRowsConv.erase(rowBest + 1);
+        rowsTemplate.insert(rowBest);
+        rowsTemplate.insert(rowBest + 1);
     }
 
-    // take the best one
-    YW_ASSERT_INFO(rowBest >= 0, "Wrong");
-    resOpt += optBest;
-    // cout << "**Resolve double: optBest: " << optBest << ", rowBest: " <<
-    // rowBest << endl; cout << "vecHap1: "; DumpIntVec(vecHap1); cout <<
-    // "vecHap2: "; DumpIntVec(vecHap2);
-    UpdateSearchGenotypes(pMatDouble, rowBest, vecHap1, vecHap2);
-
-    // cout << "Evl step matrix: ";
-    // pMatDouble->Dump();
-
-    // ScistHaplotypeMat *pMatResHap0 = dynamic_cast<ScistHaplotypeMat
-    // *>(pMatDouble); string strTreeEdgeLabel0 =
-    // ConsRootedPerfectPhylogenyFromMat(pMatResHap0->GetHapMat(), true, true);
-    // cout << "EvalGenoDoubletSet tree (step): " << strTreeEdgeLabel0 << endl;
-
-    setDoubleRowsConv.erase(rowBest);
-    setDoubleRowsConv.erase(rowBest + 1);
-    rowsTemplate.insert(rowBest);
-    rowsTemplate.insert(rowBest + 1);
-  }
-
-  return pMatDouble;
+    return pMatDouble;
 }
 
-void ScistDoubletSearch ::FindDoubletCandidates(set<int> &candidatesDoublet) {
-  // for now, each row can be a doublet
-  candidatesDoublet.clear();
-  PopulateSetWithInterval(candidatesDoublet, 0,
-                          this->genosInput.GetNumHaps() - 1);
+void ScistDoubletSearch ::FindDoubletCandidates(set<int> &candidatesDoublet)
+{
+    // for now, each row can be a doublet
+    candidatesDoublet.clear();
+    PopulateSetWithInterval(candidatesDoublet, 0, this->genosInput.GetNumHaps() - 1);
 }
 
-ScistGenGenotypeMat *
-ScistDoubletSearch ::InitSearchGenotypes(const ScistGenGenotypeMat &matToSearch,
-                                         const set<int> &candidatesDoubletCurr,
-                                         set<int> &setDoubletRows,
-                                         double &costInit) {
-  // cout << "candidatesDoubletCurr: ";
-  // DumpIntSet(candidatesDoubletCurr);
-  // cout << "matToSearch: ";
-  // matToSearch.Dump();
-  // in the new matrix to work with, put the single genotype together, and then
-  // put the doublets later
-  ScistGenGenotypeMat *pMatToProc = new ScistHaplotypeMat();
-  int numHapsNew = matToSearch.GetNumHaps() + (int)candidatesDoubletCurr.size();
-  pMatToProc->SetSize(numHapsNew, matToSearch.GetNumSites());
-
-  // fill single rows
-  set<int> setTemplateRows;
-  int hapCur = 0;
-  for (int i = 0; i < matToSearch.GetNumHaps(); ++i) {
-    if (candidatesDoubletCurr.find(i) != candidatesDoubletCurr.end()) {
-      continue;
-    }
-    // copy it
-    for (int s = 0; s < matToSearch.GetNumSites(); ++s) {
-      pMatToProc->SetGenotypeAt(hapCur, s, matToSearch.GetGenotypeAt(i, s));
-      pMatToProc->SetGenotypeProbAt(hapCur, s,
-                                    matToSearch.GetGenotypeProbAllele0At(i, s));
-    }
-    // set name
-    pMatToProc->SetGenotypeName(hapCur, matToSearch.GetGenotypeName(i));
-    setTemplateRows.insert(hapCur);
-
-    ++hapCur;
-  }
-  // cout << "After filling single rows: pMatToProc: ";
-  // pMatToProc->Dump();
-  // now  copy the doublet rows
-  for (int i = 0; i < matToSearch.GetNumHaps(); ++i) {
-    if (candidatesDoubletCurr.find(i) == candidatesDoubletCurr.end()) {
-      continue;
-    }
-    // copy it
-    for (int s = 0; s < matToSearch.GetNumSites(); ++s) {
-      pMatToProc->SetGenotypeAt(hapCur, s, matToSearch.GetGenotypeAt(i, s));
-      pMatToProc->SetGenotypeProbAt(hapCur, s,
-                                    matToSearch.GetGenotypeProbAllele0At(i, s));
-      pMatToProc->SetGenotypeAt(hapCur + 1, s, matToSearch.GetGenotypeAt(i, s));
-      pMatToProc->SetGenotypeProbAt(hapCur + 1, s,
-                                    matToSearch.GetGenotypeProbAllele0At(i, s));
+ScistGenGenotypeMat *ScistDoubletSearch ::InitSearchGenotypes(const ScistGenGenotypeMat &matToSearch, const set<int> &candidatesDoubletCurr, set<int> &setDoubletRows, double &costInit)
+{
+    //cout << "candidatesDoubletCurr: ";
+    //DumpIntSet(candidatesDoubletCurr);
+    //cout << "matToSearch: ";
+    //matToSearch.Dump();
+    // in the new matrix to work with, put the single genotype together, and then put the doublets later
+    ScistGenGenotypeMat *pMatToProc = new ScistHaplotypeMat();
+    int numHapsNew = matToSearch.GetNumHaps() + (int)candidatesDoubletCurr.size();
+    pMatToProc->SetSize(numHapsNew, matToSearch.GetNumSites());
+
+    // fill single rows
+    set<int> setTemplateRows;
+    int hapCur = 0;
+    for (int i = 0; i < matToSearch.GetNumHaps(); ++i)
+    {
+        if (candidatesDoubletCurr.find(i) != candidatesDoubletCurr.end())
+        {
+            continue;
+        }
+        // copy it
+        for (int s = 0; s < matToSearch.GetNumSites(); ++s)
+        {
+            pMatToProc->SetGenotypeAt(hapCur, s, matToSearch.GetGenotypeAt(i, s));
+            pMatToProc->SetGenotypeProbAt(hapCur, s, matToSearch.GetGenotypeProbAllele0At(i, s));
+        }
+        // set name
+        pMatToProc->SetGenotypeName(hapCur, matToSearch.GetGenotypeName(i));
+        setTemplateRows.insert(hapCur);
+
+        ++hapCur;
     }
-    // set name
-    pMatToProc->SetGenotypeName(hapCur, matToSearch.GetGenotypeName(i));
-    string strName1 = GetNewGenoDoubleRowName(matToSearch, i);
-    pMatToProc->SetGenotypeName(hapCur + 1, strName1);
+    //cout << "After filling single rows: pMatToProc: ";
+    //pMatToProc->Dump();
+    // now  copy the doublet rows
+    for (int i = 0; i < matToSearch.GetNumHaps(); ++i)
+    {
+        if (candidatesDoubletCurr.find(i) == candidatesDoubletCurr.end())
+        {
+            continue;
+        }
+        // copy it
+        for (int s = 0; s < matToSearch.GetNumSites(); ++s)
+        {
+            pMatToProc->SetGenotypeAt(hapCur, s, matToSearch.GetGenotypeAt(i, s));
+            pMatToProc->SetGenotypeProbAt(hapCur, s, matToSearch.GetGenotypeProbAllele0At(i, s));
+            pMatToProc->SetGenotypeAt(hapCur + 1, s, matToSearch.GetGenotypeAt(i, s));
+            pMatToProc->SetGenotypeProbAt(hapCur + 1, s, matToSearch.GetGenotypeProbAllele0At(i, s));
+        }
+        // set name
+        pMatToProc->SetGenotypeName(hapCur, matToSearch.GetGenotypeName(i));
+        string strName1 = GetNewGenoDoubleRowName(matToSearch, i);
+        pMatToProc->SetGenotypeName(hapCur + 1, strName1);
 
-    setDoubletRows.insert(hapCur);
-    setDoubletRows.insert(hapCur + 1);
+        setDoubletRows.insert(hapCur);
+        setDoubletRows.insert(hapCur + 1);
 
-    hapCur += 2;
-  }
-  // cout << "After filling double rows: ";
-  // pMatToProc->Dump();
+        hapCur += 2;
+    }
+    //cout << "After filling double rows: ";
+    //pMatToProc->Dump();
 
-  // now fit perfect phylogeny
-  costInit = FitPerfPhyFor(pMatToProc, setTemplateRows);
+    // now fit perfect phylogeny
+    costInit = FitPerfPhyFor(pMatToProc, setTemplateRows);
 
-  // cout << "Inflated genotype matrix: ";
-  // pMatToProc->Dump();
+    //cout << "Inflated genotype matrix: ";
+    //pMatToProc->Dump();
 
-  return pMatToProc;
+    return pMatToProc;
 }
 
-std::string ScistDoubletSearch ::GetNewGenoDoubleRowName(
-    const ScistGenGenotypeMat &matToSearch, int index) const {
-  // find a new name for the doublet s.t. it is new
-  string strName1 = matToSearch.GetGenotypeName(index) + "'";
-  while (matToSearch.FindCellByName(strName1) >= 0) {
-    strName1 = strName1 + "'";
-  }
-  return strName1;
+std::string ScistDoubletSearch ::GetNewGenoDoubleRowName(const ScistGenGenotypeMat &matToSearch, int index) const
+{
+    // find a new name for the doublet s.t. it is new
+    string strName1 = matToSearch.GetGenotypeName(index) + "'";
+    while (matToSearch.FindCellByName(strName1) >= 0)
+    {
+        strName1 = strName1 + "'";
+    }
+    return strName1;
 }
 
-void ScistDoubletSearch ::UpdateSearchGenotypes(
-    ScistGenGenotypeMat *pMatCurr, int genoDoublet,
-    const vector<int> &genoDoublePhase1, const vector<int> &genoDoublePhase2) {
-  // fill in the new values the two rows genoDouble and the next row
-  YW_ASSERT_INFO(pMatCurr->GetNumSites() == (int)genoDoublePhase1.size(),
-                 "Wrong size");
-  for (int s = 0; s < pMatCurr->GetNumSites(); ++s) {
-    pMatCurr->SetGenotypeAt(genoDoublet, s, genoDoublePhase1[s]);
-    pMatCurr->SetGenotypeAt(genoDoublet + 1, s, genoDoublePhase2[s]);
-  }
+void ScistDoubletSearch ::UpdateSearchGenotypes(ScistGenGenotypeMat *pMatCurr, int genoDoublet, const vector<int> &genoDoublePhase1, const vector<int> &genoDoublePhase2)
+{
+    // fill in the new values the two rows genoDouble and the next row
+    YW_ASSERT_INFO(pMatCurr->GetNumSites() == (int)genoDoublePhase1.size(), "Wrong size");
+    for (int s = 0; s < pMatCurr->GetNumSites(); ++s)
+    {
+        pMatCurr->SetGenotypeAt(genoDoublet, s, genoDoublePhase1[s]);
+        pMatCurr->SetGenotypeAt(genoDoublet + 1, s, genoDoublePhase2[s]);
+    }
 }
 
-double ScistDoubletSearch ::ScoreDoubletRow(ScistGenGenotypeMat *pMatCurr,
-                                            const set<int> &rowsTemplate,
-                                            int rowDouble,
-                                            vector<int> &genoDoublePhase1,
-                                            vector<int> &genoDoublePhase2) {
-  // cout << "ScistDoubletSearch :: ScoreDoubletRow: curr mat: ";
-  // pMatCurr->Dump();
-  //
-  ScistDoublet sciDouble(*pMatCurr);
-  return sciDouble.EvalGenoDoublet(rowsTemplate, rowDouble, genoDoublePhase1,
-                                   genoDoublePhase2);
+double ScistDoubletSearch ::ScoreDoubletRow(ScistGenGenotypeMat *pMatCurr, const set<int> &rowsTemplate, int rowDouble, vector<int> &genoDoublePhase1, vector<int> &genoDoublePhase2)
+{
+    //cout << "ScistDoubletSearch :: ScoreDoubletRow: curr mat: ";
+    //pMatCurr->Dump();
+    //
+    ScistDoublet sciDouble(*pMatCurr);
+    return sciDouble.EvalGenoDoublet(rowsTemplate, rowDouble, genoDoublePhase1, genoDoublePhase2);
 }
 
-double
-ScistDoubletSearch ::FitPerfPhyFor(ScistGenGenotypeMat *pMatCurr,
-                                   const std::set<int> &setTemplateRows) {
-  // cout << "template rows: ";
-  // DumpIntSet(setTemplateRows);
-  // cout << "Current matrix: ";
-  // pMatCurr->Dump();
-  // Make the chosen rows to be perfect phylogeny
-  set<int> sitesUse;
-  PopulateSetWithInterval(sitesUse, 0, this->genosInput.GetNumSites() - 1);
-
-  // create a submatrix to fit perfect phylogeny
-  ScistGenGenotypeMat *pMatSub = pMatCurr->SubMatrix(setTemplateRows, sitesUse);
-  // cout << "Submatrix: ";
-  // pMatSub->Dump();
-  ScistPerfPhyMLE sciInf1(*pMatSub);
-  sciInf1.SetOutput(false);
-  sciInf1.SetVerbose(false);
-  double opt = -1.0 * sciInf1.Infer();
-
-  // update genotype
-  // cout << "After perfect phylogeny fitting: genotypes are: opt = " << opt <<
-  // ": "; pMatSub->Dump();
-  int rowCur = 0;
-  for (set<int>::const_iterator it = setTemplateRows.begin();
-       it != setTemplateRows.end(); ++it) {
-    for (int s = 0; s < pMatCurr->GetNumSites(); ++s) {
-      pMatCurr->SetGenotypeAt(*it, s, pMatSub->GetGenotypeAt(rowCur, s));
+double ScistDoubletSearch ::FitPerfPhyFor(ScistGenGenotypeMat *pMatCurr, const std::set<int> &setTemplateRows)
+{
+    //cout << "template rows: ";
+    //DumpIntSet(setTemplateRows);
+    //cout << "Current matrix: ";
+    //pMatCurr->Dump();
+    // Make the chosen rows to be perfect phylogeny
+    set<int> sitesUse;
+    PopulateSetWithInterval(sitesUse, 0, this->genosInput.GetNumSites() - 1);
+
+    // create a submatrix to fit perfect phylogeny
+    ScistGenGenotypeMat *pMatSub = pMatCurr->SubMatrix(setTemplateRows, sitesUse);
+    //cout << "Submatrix: ";
+    //pMatSub->Dump();
+    ScistPerfPhyMLE sciInf1(*pMatSub);
+    sciInf1.SetOutput(false);
+    sciInf1.SetVerbose(false);
+    double opt = -1.0 * sciInf1.Infer();
+
+    // update genotype
+    //cout << "After perfect phylogeny fitting: genotypes are: opt = " << opt << ": ";
+    //pMatSub->Dump();
+    int rowCur = 0;
+    for (set<int>::const_iterator it = setTemplateRows.begin(); it != setTemplateRows.end(); ++it)
+    {
+        for (int s = 0; s < pMatCurr->GetNumSites(); ++s)
+        {
+            pMatCurr->SetGenotypeAt(*it, s, pMatSub->GetGenotypeAt(rowCur, s));
+        }
+        ++rowCur;
     }
-    ++rowCur;
-  }
-  // cout << "After perfect phylogeny fitting, current matrix: ";
-  // pMatCurr->Dump();
+    //cout << "After perfect phylogeny fitting, current matrix: ";
+    //pMatCurr->Dump();
 
-  delete pMatSub;
-  return opt;
+    delete pMatSub;
+    return opt;
 }
 
-std::string
-ScistDoubletSearch ::ConvMutTreeStr(const std::string &strTree) const {
-  //
-  if (this->listSiteNames.size() == 0) {
-    // no conversion if no cell names specified
-    return strTree;
-  }
-
-  TaxaMapper taxaMapper;
-  for (int i = 0; i < (int)listSiteNames.size(); ++i) {
-    taxaMapper.AddTaxaStringWithId(i + 1, listSiteNames[i]);
-  }
-  //
-  return taxaMapper.ConvIdStringWithOrigTaxa(strTree);
+std::string ScistDoubletSearch ::ConvMutTreeStr(const std::string &strTree) const
+{
+    //
+    if (this->listSiteNames.size() == 0)
+    {
+        // no conversion if no cell names specified
+        return strTree;
+    }
+
+    TaxaMapper taxaMapper;
+    for (int i = 0; i < (int)listSiteNames.size(); ++i)
+    {
+        taxaMapper.AddTaxaStringWithId(i + 1, listSiteNames[i]);
+    }
+    //
+    return taxaMapper.ConvIdStringWithOrigTaxa(strTree);
 }
 
 // *************************************************************************************
 
-void ScistDoubletTest() {
-  ScistHaplotypeMat genoMat;
-  const int numSCs = 5, numSites = 3;
-  genoMat.SetSize(numSCs, numSites);
-  genoMat.SetGenotypeAt(0, 0, 0);
-  genoMat.SetGenotypeAt(0, 1, 0);
-  genoMat.SetGenotypeAt(0, 2, 1);
-  genoMat.SetGenotypeAt(1, 0, 0);
-  genoMat.SetGenotypeAt(1, 1, 1);
-  genoMat.SetGenotypeAt(1, 2, 0);
-  genoMat.SetGenotypeAt(2, 0, 1);
-  genoMat.SetGenotypeAt(2, 1, 1);
-  genoMat.SetGenotypeAt(2, 2, 0);
-  genoMat.SetGenotypeAt(3, 0, 1);
-  genoMat.SetGenotypeAt(3, 1, 1);
-  genoMat.SetGenotypeAt(3, 2, 0);
-  genoMat.SetGenotypeAt(4, 0, 1);
-  genoMat.SetGenotypeAt(4, 1, 0);
-  genoMat.SetGenotypeAt(4, 2, 1);
-
-  // genoMat.SetSize(numSCs, numSites);
-  genoMat.SetGenotypeProbAt(0, 0, 0.8);
-  genoMat.SetGenotypeProbAt(0, 1, 0.8);
-  genoMat.SetGenotypeProbAt(0, 2, 0.1);
-  genoMat.SetGenotypeProbAt(1, 0, 0.8);
-  genoMat.SetGenotypeProbAt(1, 1, 0.1);
-  genoMat.SetGenotypeProbAt(1, 2, 0.8);
-  genoMat.SetGenotypeProbAt(2, 0, 0.1);
-  genoMat.SetGenotypeProbAt(2, 1, 0.1);
-  genoMat.SetGenotypeProbAt(2, 2, 0.8);
-  genoMat.SetGenotypeProbAt(3, 0, 0.1);
-  genoMat.SetGenotypeProbAt(3, 1, 0.1);
-  genoMat.SetGenotypeProbAt(3, 2, 0.8);
-  genoMat.SetGenotypeProbAt(4, 0, 0.3);
-  genoMat.SetGenotypeProbAt(4, 1, 0.8);
-  genoMat.SetGenotypeProbAt(4, 2, 0.1);
-
-  const int SZ_DOUBLETS = 2;
-  ScistDoubletSearch sds(genoMat, SZ_DOUBLETS);
-  sds.Search();
+void ScistDoubletTest()
+{
+    ScistHaplotypeMat genoMat;
+    const int numSCs = 5, numSites = 3;
+    genoMat.SetSize(numSCs, numSites);
+    genoMat.SetGenotypeAt(0, 0, 0);
+    genoMat.SetGenotypeAt(0, 1, 0);
+    genoMat.SetGenotypeAt(0, 2, 1);
+    genoMat.SetGenotypeAt(1, 0, 0);
+    genoMat.SetGenotypeAt(1, 1, 1);
+    genoMat.SetGenotypeAt(1, 2, 0);
+    genoMat.SetGenotypeAt(2, 0, 1);
+    genoMat.SetGenotypeAt(2, 1, 1);
+    genoMat.SetGenotypeAt(2, 2, 0);
+    genoMat.SetGenotypeAt(3, 0, 1);
+    genoMat.SetGenotypeAt(3, 1, 1);
+    genoMat.SetGenotypeAt(3, 2, 0);
+    genoMat.SetGenotypeAt(4, 0, 1);
+    genoMat.SetGenotypeAt(4, 1, 0);
+    genoMat.SetGenotypeAt(4, 2, 1);
+
+    //genoMat.SetSize(numSCs, numSites);
+    genoMat.SetGenotypeProbAt(0, 0, 0.8);
+    genoMat.SetGenotypeProbAt(0, 1, 0.8);
+    genoMat.SetGenotypeProbAt(0, 2, 0.1);
+    genoMat.SetGenotypeProbAt(1, 0, 0.8);
+    genoMat.SetGenotypeProbAt(1, 1, 0.1);
+    genoMat.SetGenotypeProbAt(1, 2, 0.8);
+    genoMat.SetGenotypeProbAt(2, 0, 0.1);
+    genoMat.SetGenotypeProbAt(2, 1, 0.1);
+    genoMat.SetGenotypeProbAt(2, 2, 0.8);
+    genoMat.SetGenotypeProbAt(3, 0, 0.1);
+    genoMat.SetGenotypeProbAt(3, 1, 0.1);
+    genoMat.SetGenotypeProbAt(3, 2, 0.8);
+    genoMat.SetGenotypeProbAt(4, 0, 0.3);
+    genoMat.SetGenotypeProbAt(4, 1, 0.8);
+    genoMat.SetGenotypeProbAt(4, 2, 0.1);
+
+    const int SZ_DOUBLETS = 2;
+    ScistDoubletSearch sds(genoMat, SZ_DOUBLETS);
+    sds.Search();
 
 #if 0
     set<int> setTemplateRows;
diff --git a/trisicell/external/scistree/ScistDoublet.hpp b/trisicell/external/scistree/ScistDoublet.hpp
index e261a56..7eb4397 100644
--- a/trisicell/external/scistree/ScistDoublet.hpp
+++ b/trisicell/external/scistree/ScistDoublet.hpp
@@ -9,10 +9,10 @@
 #ifndef ScistDoublet_hpp
 #define ScistDoublet_hpp
 
-#include <map>
+#include <vector>
 #include <set>
+#include <map>
 #include <string>
-#include <vector>
 
 class ScistGenGenotypeMat;
 class ScistPerfPhyCluster;
@@ -21,139 +21,91 @@ class ScistPerfPhyClusTreeNode;
 // *************************************************************************************
 // DP backtrace info
 
-class ScistDoubletDPTraceback {
+class ScistDoubletDPTraceback
+{
 public:
-  ScistDoubletDPTraceback();
-  ScistDoubletDPTraceback(const ScistDoubletDPTraceback &rhs);
-  ScistDoubletDPTraceback &operator=(const ScistDoubletDPTraceback &rhs);
-
-  void AddTraceback(int indexChild, int phase);
-  int GetChild1() const { return indexChild1; }
-  int GetPhase1() const { return phaseChild1; }
-  int GetChild2() const { return indexChild2; }
-  int GetPhase2() const { return phaseChild2; }
-  void SetChild1(int c) { indexChild1 = c; }
-  void SetPhase1(int p) { phaseChild1 = p; }
-  void SetChild2(int c) { indexChild2 = c; }
-  void SetPhase2(int p) { phaseChild2 = p; }
+    ScistDoubletDPTraceback();
+    ScistDoubletDPTraceback(const ScistDoubletDPTraceback &rhs);
+    ScistDoubletDPTraceback &operator=(const ScistDoubletDPTraceback &rhs);
+
+    void AddTraceback(int indexChild, int phase);
+    int GetChild1() const { return indexChild1; }
+    int GetPhase1() const { return phaseChild1; }
+    int GetChild2() const { return indexChild2; }
+    int GetPhase2() const { return phaseChild2; }
+    void SetChild1(int c) { indexChild1 = c; }
+    void SetPhase1(int p) { phaseChild1 = p; }
+    void SetChild2(int c) { indexChild2 = c; }
+    void SetPhase2(int p) { phaseChild2 = p; }
 
 private:
-  int indexChild1;
-  int phaseChild1;
-  int indexChild2;
-  int phaseChild2;
+    int indexChild1;
+    int phaseChild1;
+    int indexChild2;
+    int phaseChild2;
 };
 
 // *************************************************************************************
 // Deal with doublet (single genotype row)
 
-class ScistDoublet {
+class ScistDoublet
+{
 public:
-  ScistDoublet(const ScistGenGenotypeMat &genosInputIn);
-  double EvalGenoDoublet(const std::set<int> &setTemplateRows, int genoDoublet,
-                         std::vector<int> &genoDoublePhase1,
-                         std::vector<int> &genoDoublePhase2) const;
+    ScistDoublet(const ScistGenGenotypeMat &genosInputIn);
+    double EvalGenoDoublet(const std::set<int> &setTemplateRows, int genoDoublet, std::vector<int> &genoDoublePhase1, std::vector<int> &genoDoublePhase2) const;
 
 private:
-  void ConsDPTblDoubletNodes(
-      const std::map<int, ScistPerfPhyCluster> &setTemplateSites,
-      const std::map<const ScistPerfPhyCluster *, int> &mapClusToSiteIndex,
-      int genoDoublet, ScistPerfPhyClusTreeNode *pNodeCurr,
-      std::map<ScistPerfPhyClusTreeNode *,
-               std::vector<std::pair<double, ScistDoubletDPTraceback> > >
-          &mapNodeVals) const;
-  void ConsClustersForTemplates(
-      const std::set<int> &setTemplateRows,
-      std::map<int, ScistPerfPhyCluster> &setTemplateSites,
-      std::map<const ScistPerfPhyCluster *, int> &mapClusToSiteIndex) const;
-  void ConsPhasing(
-      const std::map<const ScistPerfPhyCluster *, int> &mapClusToSiteIndex,
-      int genoDoublet, ScistPerfPhyClusTreeNode *pNodeRoot,
-      const std::map<ScistPerfPhyClusTreeNode *,
-                     std::vector<std::pair<double, ScistDoubletDPTraceback> > >
-          &mapNodeVals,
-      std::vector<int> &vecPhasing) const;
-  void TracePhasingAtNode(
-      const std::map<const ScistPerfPhyCluster *, int> &mapClusToSiteIndex,
-      int genoDoublet, ScistPerfPhyClusTreeNode *pNodeCurr, int phasingCurr,
-      const std::map<ScistPerfPhyClusTreeNode *,
-                     std::vector<std::pair<double, ScistDoubletDPTraceback> > >
-          &mapNodeVals,
-      std::vector<int> &vecPhasing) const;
-  void ConsPhasingVec(const std::vector<int> &vecPhasing,
-                      std::vector<int> &genoDoublePhase1,
-                      std::vector<int> &genoDoublePhase2) const;
-
-  const ScistGenGenotypeMat &genosInput;
+    void ConsDPTblDoubletNodes(const std::map<int, ScistPerfPhyCluster> &setTemplateSites, const std::map<const ScistPerfPhyCluster *, int> &mapClusToSiteIndex, int genoDoublet, ScistPerfPhyClusTreeNode *pNodeCurr, std::map<ScistPerfPhyClusTreeNode *, std::vector<std::pair<double, ScistDoubletDPTraceback>>> &mapNodeVals) const;
+    void ConsClustersForTemplates(const std::set<int> &setTemplateRows, std::map<int, ScistPerfPhyCluster> &setTemplateSites, std::map<const ScistPerfPhyCluster *, int> &mapClusToSiteIndex) const;
+    void ConsPhasing(const std::map<const ScistPerfPhyCluster *, int> &mapClusToSiteIndex, int genoDoublet, ScistPerfPhyClusTreeNode *pNodeRoot, const std::map<ScistPerfPhyClusTreeNode *, std::vector<std::pair<double, ScistDoubletDPTraceback>>> &mapNodeVals, std::vector<int> &vecPhasing) const;
+    void TracePhasingAtNode(const std::map<const ScistPerfPhyCluster *, int> &mapClusToSiteIndex, int genoDoublet, ScistPerfPhyClusTreeNode *pNodeCurr, int phasingCurr, const std::map<ScistPerfPhyClusTreeNode *, std::vector<std::pair<double, ScistDoubletDPTraceback>>> &mapNodeVals, std::vector<int> &vecPhasing) const;
+    void ConsPhasingVec(const std::vector<int> &vecPhasing, std::vector<int> &genoDoublePhase1, std::vector<int> &genoDoublePhase2) const;
+
+    const ScistGenGenotypeMat &genosInput;
 };
 
 // *************************************************************************************
 // Deal with doublet (search)
 
-class ScistDoubletSearch {
+class ScistDoubletSearch
+{
 public:
-  ScistDoubletSearch(const ScistGenGenotypeMat &genosInputIn,
-                     int maxDoubletSubsetSzIn);
-  void Search();
-  void SearchInc();
-  void SetDouletCost(double c) { costDoublet = c; }
-  void SetVerbose(bool f) { fVerbose = f; }
-  void SetMutTreeOut(bool f) { fOutputPPWithEdgeLabels = f; }
-  void SetCellNames(const std::vector<std::string> &listCellNamesIn) {
-    listCellNames = listCellNamesIn;
-  }
-  void SetSiteNames(const std::vector<std::string> &listSiteNamesIn) {
-    listSiteNames = listSiteNamesIn;
-  }
-  void SetMutTreeFileName(const std::string &strMutTreeFileNameIn) {
-    this->strMutTreeFileName = strMutTreeFileNameIn;
-  }
-  static void GetNgbrTreesFrom(int numHaps, const std::string &strTree,
-                               std::set<std::string> &setNgbrTrees);
+    ScistDoubletSearch(const ScistGenGenotypeMat &genosInputIn, int maxDoubletSubsetSzIn);
+    void Search();
+    void SearchInc();
+    void SetDouletCost(double c) { costDoublet = c; }
+    void SetVerbose(bool f) { fVerbose = f; }
+    void SetMutTreeOut(bool f) { fOutputPPWithEdgeLabels = f; }
+    void SetCellNames(const std::vector<std::string> &listCellNamesIn) { listCellNames = listCellNamesIn; }
+    void SetSiteNames(const std::vector<std::string> &listSiteNamesIn) { listSiteNames = listSiteNamesIn; }
+    void SetMutTreeFileName(const std::string &strMutTreeFileNameIn) { this->strMutTreeFileName = strMutTreeFileNameIn; }
+    static void GetNgbrTreesFrom(int numHaps, const std::string &strTree, std::set<std::string> &setNgbrTrees);
 
 private:
-  ScistGenGenotypeMat *
-  EvalGenoDoubletSet(const ScistGenGenotypeMat &matToSearch,
-                     const std::set<int> &setDoubletRows, double &optCost);
-  double FitPerfPhyFor(ScistGenGenotypeMat *pMatCurr,
-                       const std::set<int> &setTemplateRows);
-  double ScoreDoubletRow(ScistGenGenotypeMat *pMatCurr,
-                         const std::set<int> &rowsTemplate, int rowDouble,
-                         std::vector<int> &genoDoublePhase1,
-                         std::vector<int> &genoDoublePhase2);
-  void FindDoubletCandidates(std::set<int> &candidatesDoublet);
-  ScistGenGenotypeMat *
-  InitSearchGenotypes(const ScistGenGenotypeMat &matToSearch,
-                      const std::set<int> &candidatesDoublet,
-                      std::set<int> &setDoubletRows, double &costInit);
-  void UpdateSearchGenotypes(ScistGenGenotypeMat *pMatCurr, int genoDoublet,
-                             const std::vector<int> &genoDoublePhase1,
-                             const std::vector<int> &genoDoublePhase2);
-  void FindOrigImputedGeno(const ScistGenGenotypeMat &genosPhasingRes,
-                           ScistGenGenotypeMat &genosImpute,
-                           std::map<int, std::set<int> > &mapDoublets) const;
-  std::string GetGenoDoubleRowName(const std::string &strName) const;
-  std::string GetNewGenoDoubleRowName(const ScistGenGenotypeMat &matToSearch,
-                                      int index) const;
-  ScistGenGenotypeMat *
-  CreateGnoesWithDouble(const ScistGenGenotypeMat &genosOrig, int indexDobule,
-                        const ScistGenGenotypeMat &genosDoubleInfer) const;
-  double ConsTree(ScistGenGenotypeMat &genosNoDoublets,
-                  std::string &strTreeNW) const;
-  void OutputMutTree(ScistGenGenotypeMat &genosNoDoublets) const;
-  std::string ConvMutTreeStr(const std::string &strTree) const;
-  void FindDoubletHapsInMat(const ScistGenGenotypeMat &genosDbl,
-                            std::set<int> &setHapsDoubles) const;
-  bool IsOverImpute(const ScistGenGenotypeMat &genosDbl) const;
-
-  const ScistGenGenotypeMat &genosInput;
-  int maxDoubletSubsetSz;
-  double costDoublet;
-  bool fVerbose;
-  bool fOutputPPWithEdgeLabels;
-  std::vector<std::string> listCellNames;
-  std::vector<std::string> listSiteNames;
-  std::string strMutTreeFileName;
+    ScistGenGenotypeMat *EvalGenoDoubletSet(const ScistGenGenotypeMat &matToSearch, const std::set<int> &setDoubletRows, double &optCost);
+    double FitPerfPhyFor(ScistGenGenotypeMat *pMatCurr, const std::set<int> &setTemplateRows);
+    double ScoreDoubletRow(ScistGenGenotypeMat *pMatCurr, const std::set<int> &rowsTemplate, int rowDouble, std::vector<int> &genoDoublePhase1, std::vector<int> &genoDoublePhase2);
+    void FindDoubletCandidates(std::set<int> &candidatesDoublet);
+    ScistGenGenotypeMat *InitSearchGenotypes(const ScistGenGenotypeMat &matToSearch, const std::set<int> &candidatesDoublet, std::set<int> &setDoubletRows, double &costInit);
+    void UpdateSearchGenotypes(ScistGenGenotypeMat *pMatCurr, int genoDoublet, const std::vector<int> &genoDoublePhase1, const std::vector<int> &genoDoublePhase2);
+    void FindOrigImputedGeno(const ScistGenGenotypeMat &genosPhasingRes, ScistGenGenotypeMat &genosImpute, std::map<int, std::set<int>> &mapDoublets) const;
+    std::string GetGenoDoubleRowName(const std::string &strName) const;
+    std::string GetNewGenoDoubleRowName(const ScistGenGenotypeMat &matToSearch, int index) const;
+    ScistGenGenotypeMat *CreateGnoesWithDouble(const ScistGenGenotypeMat &genosOrig, int indexDobule, const ScistGenGenotypeMat &genosDoubleInfer) const;
+    double ConsTree(ScistGenGenotypeMat &genosNoDoublets, std::string &strTreeNW) const;
+    void OutputMutTree(ScistGenGenotypeMat &genosNoDoublets) const;
+    std::string ConvMutTreeStr(const std::string &strTree) const;
+    void FindDoubletHapsInMat(const ScistGenGenotypeMat &genosDbl, std::set<int> &setHapsDoubles) const;
+    bool IsOverImpute(const ScistGenGenotypeMat &genosDbl) const;
+
+    const ScistGenGenotypeMat &genosInput;
+    int maxDoubletSubsetSz;
+    double costDoublet;
+    bool fVerbose;
+    bool fOutputPPWithEdgeLabels;
+    std::vector<std::string> listCellNames;
+    std::vector<std::string> listSiteNames;
+    std::string strMutTreeFileName;
 };
 
 // *************************************************************************************
diff --git a/trisicell/external/scistree/ScistErrRateInf.cpp b/trisicell/external/scistree/ScistErrRateInf.cpp
index 2db4a7c..bef1b79 100644
--- a/trisicell/external/scistree/ScistErrRateInf.cpp
+++ b/trisicell/external/scistree/ScistErrRateInf.cpp
@@ -18,105 +18,103 @@ const double DEF_RATE_FN_MAX = 0.5;
 const double DEF_RATE_FP_MIN = 0.0000001;
 const double DEF_RATE_FP_MAX = 0.05;
 
-ScistErrRateInf ::ScistErrRateInf(ScistGenGenotypeMat &genos)
-    : genosInput(genos), rateFNMin(DEF_RATE_FN_MIN), rateFNMax(DEF_RATE_FN_MAX),
-      rateFPMin(DEF_RATE_FP_MIN), rateFPMax(DEF_RATE_FP_MAX), fVerbose(false) {
-  //
-  rateFNOpt = 0.5 * (rateFNMin + rateFNMax);
-  rateFPOpt = 0.5 * (rateFPMin + rateFPMax);
+ScistErrRateInf ::ScistErrRateInf(ScistGenGenotypeMat &genos) : genosInput(genos), rateFNMin(DEF_RATE_FN_MIN), rateFNMax(DEF_RATE_FN_MAX), rateFPMin(DEF_RATE_FP_MIN), rateFPMax(DEF_RATE_FP_MAX), fVerbose(false)
+{
+    //
+    rateFNOpt = 0.5 * (rateFNMin + rateFNMax);
+    rateFPOpt = 0.5 * (rateFPMin + rateFPMax);
 }
 
-void ScistErrRateInf ::Infer() {
-  // EM algorithm.
-  const double THRES_LARGER_RATIO = 1.05;
-  double likeliMaxAll = -1.0 * HAP_MAX_INT;
-  while (true) {
-    // now search for rateFP then we are done
-    std::set<std::pair<std::pair<int, int>, int> > listChangedPlaces;
-    double likeliMax2 =
-        CalcMaxProbFor(this->rateFNOpt, this->rateFPOpt, listChangedPlaces);
+void ScistErrRateInf ::Infer()
+{
+    // EM algorithm.
+    const double THRES_LARGER_RATIO = 1.05;
+    double likeliMaxAll = -1.0 * HAP_MAX_INT;
+    while (true)
+    {
+        // now search for rateFP then we are done
+        std::set<std::pair<std::pair<int, int>, int>> listChangedPlaces;
+        double likeliMax2 = CalcMaxProbFor(this->rateFNOpt, this->rateFPOpt, listChangedPlaces);
 
-    if (fVerbose) {
-      cout << "Current likelihood for optimizing false positive rate is "
-           << likeliMax2 << ", FN estimate: " << this->rateFNOpt
-           << ", FP estimate: " << this->rateFPOpt << endl;
-    }
-    if (NumericalAlgoUtils::IsLikeliSignificantlyLargeThresNum(
-            likeliMax2, likeliMaxAll, 1, THRES_LARGER_RATIO) == false) {
-      break;
+        if (fVerbose)
+        {
+            cout << "Current likelihood for optimizing false positive rate is " << likeliMax2 << ", FN estimate: " << this->rateFNOpt << ", FP estimate: " << this->rateFPOpt << endl;
+        }
+        if (NumericalAlgoUtils::IsLikeliSignificantlyLargeThresNum(likeliMax2, likeliMaxAll, 1, THRES_LARGER_RATIO) == false)
+        {
+            break;
+        }
+        likeliMaxAll = likeliMax2;
+        UpdateEstimates(listChangedPlaces);
     }
-    likeliMaxAll = likeliMax2;
-    UpdateEstimates(listChangedPlaces);
-  }
 
-  cout << "Optimal false negative rate is " << this->rateFNOpt
-       << ", and optimal false positive rate is " << this->rateFPOpt << endl;
+    cout << "Optimal false negative rate is " << this->rateFNOpt << ", and optimal false positive rate is " << this->rateFPOpt << endl;
 }
 
-double ScistErrRateInf ::CalcMaxProbFor(
-    double rateFN, double rateFP,
-    std::set<std::pair<std::pair<int, int>, int> > &listChangedPlaces) {
-  // cout << "rateFN: " << rateFN << ", rateFP: " << rateFP << endl;
-  //
-  ScistGenGenotypeMat *pGenosMatTest = genosInput.Copy();
+double ScistErrRateInf ::CalcMaxProbFor(double rateFN, double rateFP, std::set<std::pair<std::pair<int, int>, int>> &listChangedPlaces)
+{
+    //cout << "rateFN: " << rateFN << ", rateFP: " << rateFP << endl;
+    //
+    ScistGenGenotypeMat *pGenosMatTest = genosInput.Copy();
 
-  // setup prob based on the rate
-  for (int s = 0; s < genosInput.GetNumSites(); ++s) {
-    for (int c = 0; c < genosInput.GetNumHaps(); ++c) {
-      int allele = genosInput.GetGenotypeAt(c, s);
-      double prob0 = 1.0 - rateFN;
-      if (allele == 1) {
-        prob0 = rateFP;
-      }
-      // cout << "Setting cell " << c << ", site " << s << ", prob0: " << prob0
-      // << endl;
+    // setup prob based on the rate
+    for (int s = 0; s < genosInput.GetNumSites(); ++s)
+    {
+        for (int c = 0; c < genosInput.GetNumHaps(); ++c)
+        {
+            int allele = genosInput.GetGenotypeAt(c, s);
+            double prob0 = 1.0 - rateFN;
+            if (allele == 1)
+            {
+                prob0 = rateFP;
+            }
+            //cout << "Setting cell " << c << ", site " << s << ", prob0: " << prob0 << endl;
 
-      pGenosMatTest->SetGenotypeProbAt(c, s, prob0);
+            pGenosMatTest->SetGenotypeProbAt(c, s, prob0);
+        }
     }
-  }
-  // cout << "Genotype matrix to test: " << endl;
-  // pGenosMatTest->Dump();
+    //cout << "Genotype matrix to test: " << endl;
+    //pGenosMatTest->Dump();
 
-  double probMax = CalcMaxProbForMat(*pGenosMatTest, listChangedPlaces);
-  // cout << "For rateFN: " << rateFN << ", rateFP: " << rateFP << "
-  // CalcMaxProbFor: " << probMax << endl;
+    double probMax = CalcMaxProbForMat(*pGenosMatTest, listChangedPlaces);
+    //cout << "For rateFN: " << rateFN << ", rateFP: " << rateFP << " CalcMaxProbFor: " << probMax << endl;
 
-  delete pGenosMatTest;
+    delete pGenosMatTest;
 
-  return probMax;
+    return probMax;
 }
 
-double ScistErrRateInf ::CalcMaxProbForMat(
-    ScistGenGenotypeMat &genosTest,
-    std::set<std::pair<std::pair<int, int>, int> > &listChangedPlaces) {
-  //
-  ScistPerfPhyMLE phInf1(genosTest);
-  phInf1.SetVerbose(false);
-  phInf1.SetOutput(false);
-  double res = phInf1.Infer(&listChangedPlaces);
-  // cout << "In CalcMaxProbForMat: prob=" << res << ", matrix: \n";
-  // genosTest.Dump();
-  return res;
+double ScistErrRateInf ::CalcMaxProbForMat(ScistGenGenotypeMat &genosTest, std::set<std::pair<std::pair<int, int>, int>> &listChangedPlaces)
+{
+    //
+    ScistPerfPhyMLE phInf1(genosTest);
+    phInf1.SetVerbose(false);
+    phInf1.SetOutput(false);
+    double res = phInf1.Infer(&listChangedPlaces);
+    //cout << "In CalcMaxProbForMat: prob=" << res << ", matrix: \n";
+    //genosTest.Dump();
+    return res;
 }
 
-void ScistErrRateInf ::UpdateEstimates(
-    const std::set<std::pair<std::pair<int, int>, int> > &listChangedPlaces) {
-  //
-  int num0to1 = 0, num1to0 = 0;
-  for (set<pair<pair<int, int>, int> >::const_iterator it =
-           listChangedPlaces.begin();
-       it != listChangedPlaces.end(); ++it) {
+void ScistErrRateInf ::UpdateEstimates(const std::set<std::pair<std::pair<int, int>, int>> &listChangedPlaces)
+{
     //
-    if (it->second == 0) {
-      ++num1to0;
-    } else {
-      ++num0to1;
+    int num0to1 = 0, num1to0 = 0;
+    for (set<pair<pair<int, int>, int>>::const_iterator it = listChangedPlaces.begin(); it != listChangedPlaces.end(); ++it)
+    {
+        //
+        if (it->second == 0)
+        {
+            ++num1to0;
+        }
+        else
+        {
+            ++num0to1;
+        }
     }
-  }
-  int num0Tot = this->genosInput.GetGenotypeNumOf(0);
-  int num1Tot = this->genosInput.GetGenotypeNumOf(1);
-  // cout << "In UpdateEsimate: num0to1: " << num0to1 << ", num1to0: " <<
-  // num1to0 << ", num0Tot: " << num0Tot << ", num1Tot: " << num1Tot << endl;
-  this->rateFNOpt = ((double)(num0to1 + 1)) / (num0to1 + num1Tot + 2);
-  this->rateFPOpt = ((double)(num1to0 + 1)) / (num1to0 + num0Tot + 2);
+    int num0Tot = this->genosInput.GetGenotypeNumOf(0);
+    int num1Tot = this->genosInput.GetGenotypeNumOf(1);
+    //cout << "In UpdateEsimate: num0to1: " << num0to1 << ", num1to0: " << num1to0 << ", num0Tot: " << num0Tot << ", num1Tot: " << num1Tot << endl;
+    this->rateFNOpt = ((double)(num0to1 + 1)) / (num0to1 + num1Tot + 2);
+    this->rateFPOpt = ((double)(num1to0 + 1)) / (num1to0 + num0Tot + 2);
 }
diff --git a/trisicell/external/scistree/ScistErrRateInf.hpp b/trisicell/external/scistree/ScistErrRateInf.hpp
index 3388eda..a057791 100644
--- a/trisicell/external/scistree/ScistErrRateInf.hpp
+++ b/trisicell/external/scistree/ScistErrRateInf.hpp
@@ -15,30 +15,26 @@
 // *************************************************************************************
 // Inf error rate
 
-class ScistErrRateInf {
+class ScistErrRateInf
+{
 public:
-  ScistErrRateInf(ScistGenGenotypeMat &genos);
-  void Infer();
-  void SetVerbose(bool f) { fVerbose = f; }
+    ScistErrRateInf(ScistGenGenotypeMat &genos);
+    void Infer();
+    void SetVerbose(bool f) { fVerbose = f; }
 
 private:
-  double CalcMaxProbFor(
-      double rateFN, double rateFP,
-      std::set<std::pair<std::pair<int, int>, int> > &listChangedPlaces);
-  double CalcMaxProbForMat(
-      ScistGenGenotypeMat &genosTest,
-      std::set<std::pair<std::pair<int, int>, int> > &listChangedPlaces);
-  void UpdateEstimates(
-      const std::set<std::pair<std::pair<int, int>, int> > &listChangedPlaces);
+    double CalcMaxProbFor(double rateFN, double rateFP, std::set<std::pair<std::pair<int, int>, int>> &listChangedPlaces);
+    double CalcMaxProbForMat(ScistGenGenotypeMat &genosTest, std::set<std::pair<std::pair<int, int>, int>> &listChangedPlaces);
+    void UpdateEstimates(const std::set<std::pair<std::pair<int, int>, int>> &listChangedPlaces);
 
-  ScistGenGenotypeMat &genosInput;
-  double rateFNMin;
-  double rateFNMax;
-  double rateFPMin;
-  double rateFPMax;
-  double rateFNOpt;
-  double rateFPOpt;
-  bool fVerbose;
+    ScistGenGenotypeMat &genosInput;
+    double rateFNMin;
+    double rateFNMax;
+    double rateFPMin;
+    double rateFPMax;
+    double rateFNOpt;
+    double rateFPOpt;
+    bool fVerbose;
 };
 
 #endif /* ScistErrRateInf_hpp */
diff --git a/trisicell/external/scistree/ScistGenotype.cpp b/trisicell/external/scistree/ScistGenotype.cpp
index a8617e5..78b114a 100644
--- a/trisicell/external/scistree/ScistGenotype.cpp
+++ b/trisicell/external/scistree/ScistGenotype.cpp
@@ -7,1170 +7,1196 @@
 //
 
 #include "ScistGenotype.hpp"
-#include "MarginalTree.h"
-#include "PhylogenyTree.h"
-#include "RerootTreeUtils.h"
-#include "TreeBuilder.h"
 #include "Utils3.h"
-#include "Utils4.h"
 #include <cmath>
 #include <iomanip>
+#include "PhylogenyTree.h"
+#include "TreeBuilder.h"
+#include "MarginalTree.h"
+#include "Utils4.h"
+#include "RerootTreeUtils.h"
 
 // *************************************************************************************
 // genotypes: integer matrix
 
-ScistGenGenotypeMat::ScistGenGenotypeMat() : thresSignifcant(0.0) {}
-
-void ScistGenGenotypeMat ::TrimCliquesMaxDiff(
-    std::set<std::set<int> > &listCliques, int maxToKeep) const {
-  // cout << "Entering trim, number of cliques: " << listCliques.size() << ",
-  // maxToKeep: " << maxToKeep << endl;
-  // keep only the most different ones
-  if ((int)listCliques.size() <= maxToKeep) {
-    return;
-  }
-  // find the distance between two sets
-  map<pair<const set<int> *, const set<int> *>, int> mapPairCliqueDiff;
-  for (set<set<int> >::iterator it1 = listCliques.begin();
-       it1 != listCliques.end(); ++it1) {
-    set<set<int> >::iterator it2 = it1;
-    ++it2;
-    for (; it2 != listCliques.end(); ++it2) {
-      //
-      set<int> sint;
-      JoinSets(*it1, *it2, sint);
-      pair<const set<int> *, const set<int> *> pp1(&(*it1), &(*it2)),
-          pp2(&(*it2), &(*it1));
-      mapPairCliqueDiff[pp1] = it1->size() + it2->size() - 2 * sint.size();
-      mapPairCliqueDiff[pp2] = mapPairCliqueDiff[pp1];
-    }
-  }
-
-  // increamentally add the most different; first add the first clique
-  set<const set<int> *> listCliquesNext;
-  listCliquesNext.insert(&(*listCliques.begin()));
-  while ((int)listCliquesNext.size() < maxToKeep) {
-    const set<int> *pcliqueToAdd = NULL;
-    int diffMax = 0;
-    for (set<set<int> >::iterator it1 = listCliques.begin();
-         it1 != listCliques.end(); ++it1) {
-      //
-      if (listCliquesNext.find(&(*it1)) != listCliquesNext.end()) {
-        //
-        continue;
-      }
-
-      //
-      int diffCurr = 0;
-      for (set<const set<int> *>::iterator it2 = listCliquesNext.begin();
-           it2 != listCliquesNext.end(); ++it2) {
-        pair<const set<int> *, const set<int> *> pp(*it2, &(*it1));
-        YW_ASSERT_INFO(mapPairCliqueDiff.find(pp) != mapPairCliqueDiff.end(),
-                       "Fail to find");
-        diffCurr += mapPairCliqueDiff[pp];
-      }
-      if (diffCurr > diffMax) {
-        diffMax = diffCurr;
-        pcliqueToAdd = &(*it1);
-      }
-    }
-    YW_ASSERT_INFO(pcliqueToAdd != NULL, "Cannot be null");
-    listCliquesNext.insert(pcliqueToAdd);
-    // cout << "In TrimCliquesMaxDiff: adding clique: ";
-    // DumpIntSet(*pcliqueToAdd);
-  }
-  set<set<int> > listCliquesNextUse;
-  for (set<const set<int> *>::iterator it = listCliquesNext.begin();
-       it != listCliquesNext.end(); ++it) {
-    listCliquesNextUse.insert(*(*it));
-  }
-  listCliques = listCliquesNextUse;
-}
-
-ScistGenGenotypeMat *
-ScistGenGenotypeMat ::SubMatrix(const std::set<int> &setRows,
-                                const std::set<int> &setSites) const {
-  ScistGenGenotypeMat *pMatNew = CreateNewMat();
-  pMatNew->SetSize(setRows.size(), setSites.size());
-  // set row name
-  int rowCurr = 0;
-  for (set<int>::iterator it = setRows.begin(); it != setRows.end(); ++it) {
-    int siteCurr = 0;
-    for (set<int>::iterator it2 = setSites.begin(); it2 != setSites.end();
-         ++it2) {
-      pMatNew->SetGenotypeAt(rowCurr, siteCurr, GetGenotypeAt(*it, *it2));
-      pMatNew->SetGenotypeProbAt(rowCurr, siteCurr,
-                                 GetGenotypeProbAllele0At(*it, *it2));
-      ++siteCurr;
-    }
-
-    pMatNew->SetGenotypeName(rowCurr, GetGenotypeName(*it));
-    ++rowCurr;
-  }
-  return pMatNew;
-}
-
-std::string ScistGenGenotypeMat ::ConsNJTree() const {
-  //
-  PhyloDistance dist;
-  // setup pairwise hamming distance
-  for (int i = 0; i < GetNumHaps(); ++i) {
-    for (int j = i + 1; j < GetNumHaps(); ++j) {
-      //
-      double d = CalcHammingDistBetwHaps(i, j);
-      dist.SetDistance(i, j, d);
-      // cout << "Distance between (" << i << "," << j << "): " << d << endl;
-    }
-  }
-  DistanceTreeBuilder dtb(dist);
-  for (int i = 0; i < GetNumHaps(); ++i) {
-    int indexUse = i + 1;
-    string strIndexToUse = std::to_string(indexUse);
-    dtb.SetTaxonName(i, strIndexToUse);
-  }
-  return dtb.NJ();
-}
-
-std::string ScistGenGenotypeMat ::ConsNJTreeZeroRoot() const {
-  //
-  PhyloDistance dist;
-  // setup pairwise hamming distance
-  for (int i = 0; i < GetNumHaps(); ++i) {
-    for (int j = i + 1; j < GetNumHaps(); ++j) {
-      //
-      double d = CalcHammingDistBetwHaps(i, j);
-      dist.SetDistance(i, j, d);
-      // cout << "Distance between (" << i << "," << j << "): " << d << endl;
-    }
-  }
-  // add one more hap: all-0
-  for (int i = 0; i < GetNumHaps(); ++i) {
-    //
-    double d = 0.0;
-    for (int s = 0; s < GetNumSites(); ++s) {
-      if (GetGenotypeAt(i, s) != 0) {
-        d += 1.0;
-      }
-    }
-    d = d / GetNumSites();
-    dist.SetDistance(i, GetNumHaps(), d);
-  }
-
-  DistanceTreeBuilder dtb(dist);
-  for (int i = 0; i <= GetNumHaps(); ++i) {
-    int indexUse = i + 1;
-    string strIndexToUse = std::to_string(indexUse);
-    dtb.SetTaxonName(i, strIndexToUse);
-  }
-  string strNJWithRoot = dtb.NJ();
-  // cout << "strNJWithRoot: " << strNJWithRoot << endl;
-  // reroot
-  string strIdRoot = std::to_string(GetNumHaps() + 1);
-  char strNJWithRootBuf[102400];
-  strcpy(strNJWithRootBuf, strNJWithRoot.c_str());
-  char strIdRootBuf[102400];
-  strcpy(strIdRootBuf, strIdRoot.c_str());
-  string strNJWithRootReroot = ReRootTreeNewick(strNJWithRootBuf, strIdRootBuf);
-  // cout << "strNJWithRootReroot: " << strNJWithRootReroot << endl;
-  // remove the root
-  MarginalTree mtree;
-  ReadinMarginalTreesNewickWLenString(strNJWithRootReroot,
-                                      this->GetNumHaps() + 1, mtree);
-  mtree.BuildDescendantInfo();
-  int posRootLeaf = mtree.GetPosForLabel(this->GetNumHaps() + 1);
-  YW_ASSERT_INFO(posRootLeaf >= 0, "Fail to find the root");
-  mtree.RemoveLeafNodeFromBinaryTree(posRootLeaf);
-  mtree.BuildDescendantInfo();
-  // cout << "Aftre removing reoot: " << mtree.GetNewickSorted(false) << endl;
-  return mtree.GetNewickSorted(false);
-}
-
-std::string ScistGenGenotypeMat ::ConsNJTreeNoInc() const {
-  PhyloDistance dist;
-  // setup pairwise hamming distance
-  for (int i = 0; i < GetNumHaps(); ++i) {
-    for (int j = i + 1; j < GetNumHaps(); ++j) {
-      //
-      double d = CalcHammingDistBetwHaps(i, j);
-      dist.SetDistance(i, j, d);
-      // cout << "Distance between (" << i << "," << j << "): " << d << endl;
-    }
-  }
-  DistanceTreeBuilder dtb(dist);
-  return dtb.NJ();
-}
-
-double ScistGenGenotypeMat ::CalcHammingDistBetwHaps(int h1, int h2) const {
-  int numDiffs = 0;
-  for (int c = 0; c < GetNumSites(); ++c) {
-    if (GetGenotypeAt(h1, c) != GetGenotypeAt(h2, c) &&
-        IsProbAtCellPosSignificant(h1, c, GetSignificanceThres()) &&
-        IsProbAtCellPosSignificant(h2, c, GetSignificanceThres())) {
-      ++numDiffs;
-    }
-  }
-  return (1.0 * numDiffs) / GetNumSites();
-}
-
-void ScistGenGenotypeMat ::ConsCompatMap(
-    std::set<std::pair<int, int> > &setCompatPairs) const {
-  //
-  setCompatPairs.clear();
-  for (int s1 = 0; s1 < GetNumSites(); ++s1) {
-    for (int s2 = s1 + 1; s2 < GetNumSites(); ++s2) {
-      if (IsCompatible(s1, s2)) {
-        pair<int, int> pp(s1, s2);
-        setCompatPairs.insert(pp);
-      }
-    }
-  }
-}
-
-bool ScistGenGenotypeMat ::AreSitesCompatInMap(
-    const std::set<std::pair<int, int> > &setCompatPairs, int s1, int s2) {
-  //
-  pair<int, int> pp(s1, s2);
-  OrderInt(pp.first, pp.second);
-  return setCompatPairs.find(pp) != setCompatPairs.end();
-}
-
-int ScistGenGenotypeMat ::GetGenotypeNumOf(int geno) const {
-  int res = 0;
-  for (int i = 0; i < GetNumHaps(); ++i) {
-    for (int j = 0; j < GetNumSites(); ++j) {
-      if (GetGenotypeAt(i, j) == geno) {
-        ++res;
-      }
-    }
-  }
-  return res;
-}
-
-int ScistGenGenotypeMat ::FindCellByName(const std::string &strName) const {
-  //
-  for (int i = 0; i < GetNumHaps(); ++i) {
-    if (GetGenotypeName(i) == strName) {
-      return i;
-    }
-  }
-  return -1;
-}
-
-void ScistGenGenotypeMat ::Dump() const {
-  cout << "Genotype names: ";
-  for (int i = 0; i < GetNumHaps(); ++i) {
-    cout << GetGenotypeName(i) << "  ";
-  }
-  cout << endl;
-}
-
-void ScistGenGenotypeMat ::ChangeGenosAtPositions(
-    const std::set<std::pair<std::pair<int, int>, int> > &listChangedPlaces) {
-  //
-  for (std::set<std::pair<std::pair<int, int>, int> >::const_iterator it =
-           listChangedPlaces.begin();
-       it != listChangedPlaces.end(); ++it) {
-    //
-    SetGenotypeAt(it->first.first, it->first.second, it->second);
-  }
+ScistGenGenotypeMat::ScistGenGenotypeMat() : thresSignifcant(0.0)
+{
 }
 
-// *************************************************************************************
-// genotypes: binary matrix
+void ScistGenGenotypeMat ::TrimCliquesMaxDiff(std::set<std::set<int>> &listCliques, int maxToKeep) const
+{
+    //cout << "Entering trim, number of cliques: " << listCliques.size() << ", maxToKeep: " << maxToKeep << endl;
+    // keep only the most different ones
+    if ((int)listCliques.size() <= maxToKeep)
+    {
+        return;
+    }
+    // find the distance between two sets
+    map<pair<const set<int> *, const set<int> *>, int> mapPairCliqueDiff;
+    for (set<set<int>>::iterator it1 = listCliques.begin(); it1 != listCliques.end(); ++it1)
+    {
+        set<set<int>>::iterator it2 = it1;
+        ++it2;
+        for (; it2 != listCliques.end(); ++it2)
+        {
+            //
+            set<int> sint;
+            JoinSets(*it1, *it2, sint);
+            pair<const set<int> *, const set<int> *> pp1(&(*it1), &(*it2)), pp2(&(*it2), &(*it1));
+            mapPairCliqueDiff[pp1] = it1->size() + it2->size() - 2 * sint.size();
+            mapPairCliqueDiff[pp2] = mapPairCliqueDiff[pp1];
+        }
+    }
 
-ScistHaplotypeMat ::ScistHaplotypeMat() {}
+    // increamentally add the most different; first add the first clique
+    set<const set<int> *> listCliquesNext;
+    listCliquesNext.insert(&(*listCliques.begin()));
+    while ((int)listCliquesNext.size() < maxToKeep)
+    {
+        const set<int> *pcliqueToAdd = NULL;
+        int diffMax = 0;
+        for (set<set<int>>::iterator it1 = listCliques.begin(); it1 != listCliques.end(); ++it1)
+        {
+            //
+            if (listCliquesNext.find(&(*it1)) != listCliquesNext.end())
+            {
+                //
+                continue;
+            }
 
-ScistGenGenotypeMat *ScistHaplotypeMat ::Copy() const {
-  //
-  ScistHaplotypeMat *pMatCopy = new ScistHaplotypeMat();
+            //
+            int diffCurr = 0;
+            for (set<const set<int> *>::iterator it2 = listCliquesNext.begin(); it2 != listCliquesNext.end(); ++it2)
+            {
+                pair<const set<int> *, const set<int> *> pp(*it2, &(*it1));
+                YW_ASSERT_INFO(mapPairCliqueDiff.find(pp) != mapPairCliqueDiff.end(), "Fail to find");
+                diffCurr += mapPairCliqueDiff[pp];
+            }
+            if (diffCurr > diffMax)
+            {
+                diffMax = diffCurr;
+                pcliqueToAdd = &(*it1);
+            }
+        }
+        YW_ASSERT_INFO(pcliqueToAdd != NULL, "Cannot be null");
+        listCliquesNext.insert(pcliqueToAdd);
+        //cout << "In TrimCliquesMaxDiff: adding clique: ";
+        //DumpIntSet(*pcliqueToAdd);
+    }
+    set<set<int>> listCliquesNextUse;
+    for (set<const set<int> *>::iterator it = listCliquesNext.begin(); it != listCliquesNext.end(); ++it)
+    {
+        listCliquesNextUse.insert(*(*it));
+    }
+    listCliques = listCliquesNextUse;
+}
 
-  for (int i = 0; i < GetNumNames(); ++i) {
-    pMatCopy->AddGenotypeName(GetGenotypeName(i));
-  }
+ScistGenGenotypeMat *ScistGenGenotypeMat ::SubMatrix(const std::set<int> &setRows, const std::set<int> &setSites) const
+{
+    ScistGenGenotypeMat *pMatNew = CreateNewMat();
+    pMatNew->SetSize(setRows.size(), setSites.size());
+    // set row name
+    int rowCurr = 0;
+    for (set<int>::iterator it = setRows.begin(); it != setRows.end(); ++it)
+    {
+        int siteCurr = 0;
+        for (set<int>::iterator it2 = setSites.begin(); it2 != setSites.end(); ++it2)
+        {
+            pMatNew->SetGenotypeAt(rowCurr, siteCurr, GetGenotypeAt(*it, *it2));
+            pMatNew->SetGenotypeProbAt(rowCurr, siteCurr, GetGenotypeProbAllele0At(*it, *it2));
+            ++siteCurr;
+        }
 
-  pMatCopy->SetSize(GetNumHaps(), GetNumSites());
+        pMatNew->SetGenotypeName(rowCurr, GetGenotypeName(*it));
+        ++rowCurr;
+    }
+    return pMatNew;
+}
 
-  //
-  for (int i = 0; i < GetNumHaps(); ++i) {
-    for (int j = 0; j < GetNumSites(); ++j) {
-      pMatCopy->SetGenotypeAt(i, j, GetGenotypeAt(i, j));
-      pMatCopy->SetGenotypeProbAt(i, j, GetGenotypeProbAllele0At(i, j));
+std::string ScistGenGenotypeMat ::ConsNJTree() const
+{
+    //
+    PhyloDistance dist;
+    // setup pairwise hamming distance
+    for (int i = 0; i < GetNumHaps(); ++i)
+    {
+        for (int j = i + 1; j < GetNumHaps(); ++j)
+        {
+            //
+            double d = CalcHammingDistBetwHaps(i, j);
+            dist.SetDistance(i, j, d);
+            //cout << "Distance between (" << i << "," << j << "): " << d << endl;
+        }
+    }
+    DistanceTreeBuilder dtb(dist);
+    for (int i = 0; i < GetNumHaps(); ++i)
+    {
+        int indexUse = i + 1;
+        string strIndexToUse = std::to_string(indexUse);
+        dtb.SetTaxonName(i, strIndexToUse);
     }
-  }
+    return dtb.NJ();
+}
 
-  return pMatCopy;
+std::string ScistGenGenotypeMat ::ConsNJTreeZeroRoot() const
+{
+    //
+    PhyloDistance dist;
+    // setup pairwise hamming distance
+    for (int i = 0; i < GetNumHaps(); ++i)
+    {
+        for (int j = i + 1; j < GetNumHaps(); ++j)
+        {
+            //
+            double d = CalcHammingDistBetwHaps(i, j);
+            dist.SetDistance(i, j, d);
+            //cout << "Distance between (" << i << "," << j << "): " << d << endl;
+        }
+    }
+    // add one more hap: all-0
+    for (int i = 0; i < GetNumHaps(); ++i)
+    {
+        //
+        double d = 0.0;
+        for (int s = 0; s < GetNumSites(); ++s)
+        {
+            if (GetGenotypeAt(i, s) != 0)
+            {
+                d += 1.0;
+            }
+        }
+        d = d / GetNumSites();
+        dist.SetDistance(i, GetNumHaps(), d);
+    }
+
+    DistanceTreeBuilder dtb(dist);
+    for (int i = 0; i <= GetNumHaps(); ++i)
+    {
+        int indexUse = i + 1;
+        string strIndexToUse = std::to_string(indexUse);
+        dtb.SetTaxonName(i, strIndexToUse);
+    }
+    string strNJWithRoot = dtb.NJ();
+    //cout << "strNJWithRoot: " << strNJWithRoot << endl;
+    // reroot
+    string strIdRoot = std::to_string(GetNumHaps() + 1);
+    char strNJWithRootBuf[102400];
+    strcpy(strNJWithRootBuf, strNJWithRoot.c_str());
+    char strIdRootBuf[102400];
+    strcpy(strIdRootBuf, strIdRoot.c_str());
+    string strNJWithRootReroot = ReRootTreeNewick(strNJWithRootBuf, strIdRootBuf);
+    //cout << "strNJWithRootReroot: " << strNJWithRootReroot << endl;
+    // remove the root
+    MarginalTree mtree;
+    ReadinMarginalTreesNewickWLenString(strNJWithRootReroot, this->GetNumHaps() + 1, mtree);
+    mtree.BuildDescendantInfo();
+    int posRootLeaf = mtree.GetPosForLabel(this->GetNumHaps() + 1);
+    YW_ASSERT_INFO(posRootLeaf >= 0, "Fail to find the root");
+    mtree.RemoveLeafNodeFromBinaryTree(posRootLeaf);
+    mtree.BuildDescendantInfo();
+    //cout << "Aftre removing reoot: " << mtree.GetNewickSorted(false) << endl;
+    return mtree.GetNewickSorted(false);
 }
 
-bool ScistHaplotypeMat ::ReadFromFile(std::ifstream &infile, int numSites,
-                                      int numSCs, bool fSiteName) {
-  // cout << "ScistHaplotypeMat :: ReadFromFile: numSites: " << numSites << ",
-  // numSCs: " << numSCs << endl;
-  //
-  // assume each site is independent
-  SetSize(numSCs, numSites);
-  for (int i = 0; i < numSites; ++i) {
-    string strName;
-    if (fSiteName) {
-      infile >> strName;
-    } else {
-      strName = std::to_string(i + 1);
+std::string ScistGenGenotypeMat ::ConsNJTreeNoInc() const
+{
+    PhyloDistance dist;
+    // setup pairwise hamming distance
+    int numberHaps = GetNumHaps();
+    for (int i = 0; i < numberHaps; ++i)
+    {
+        for (int j = i + 1; j < numberHaps; ++j)
+        {
+            //
+            double d = CalcHammingDistBetwHaps(i, j);
+            dist.SetDistance(i, j, d);
+            //cout << "Distance between (" << i << "," << j << "): " << d << endl;
+        }
     }
-    AddSiteName(strName);
+    DistanceTreeBuilder dtb(dist);
+    return dtb.NJ();
+}
 
-    // cout << "Read in site: " << i << endl;
-    for (int j = 0; j < numSCs; ++j) {
-      double prob0 = 0.0;
-      bool res = ReadFromFileHapProb(infile, prob0);
-      if (res == false) {
-        return false;
-      }
-      // choose the allele w/ higher prob
-      int allele = 0;
-      if (prob0 < 0.5) {
-        allele = 1;
-      }
-      SetGenotypeAt(j, i, allele);
+double ScistGenGenotypeMat ::CalcHammingDistBetwHaps(int h1, int h2) const
+{
+    int numDiffs = 0;
+    int numberSites = GetNumSites();
+    for (int c = 0; c < numberSites; ++c)
+    {
+        if (GetGenotypeAt(h1, c) != GetGenotypeAt(h2, c) && IsProbAtCellPosSignificant(h1, c, GetSignificanceThres()) && IsProbAtCellPosSignificant(h2, c, GetSignificanceThres()))
+        {
+            ++numDiffs;
+        }
+    }
+    return (1.0 * numDiffs) / numberSites;
+}
 
-      matHaplotypesProb0[j][i] = prob0;
+void ScistGenGenotypeMat ::ConsCompatMap(std::set<std::pair<int, int>> &setCompatPairs) const
+{
+    //
+    setCompatPairs.clear();
+    int numberSites = GetNumSites();
+    for (int s1 = 0; s1 < numberSites; ++s1)
+    {
+        for (int s2 = s1 + 1; s2 < numberSites; ++s2)
+        {
+            if (IsCompatible(s1, s2))
+            {
+                std::pair<int, int> pp(s1, s2);
+                setCompatPairs.insert(pp);
+            }
+        }
     }
-  }
+}
 
-  // cout << "Input matrix: ";
-  // this->matHaplotypes.Dump();
+bool ScistGenGenotypeMat ::AreSitesCompatInMap(const std::set<std::pair<int, int>> &setCompatPairs, int s1, int s2)
+{
+    //
+    std::pair<int, int> pp(s1, s2);
+    OrderInt(pp.first, pp.second);
+    return setCompatPairs.find(pp) != setCompatPairs.end();
+}
 
-  return true;
+int ScistGenGenotypeMat ::GetGenotypeNumOf(int geno) const
+{
+    int res = 0;
+    int numberHaps = GetNumHaps();
+    for (int i = 0; i < numberHaps; ++i)
+    {
+        for (int j = 0; j < numberHaps; ++j)
+        {
+            if (GetGenotypeAt(i, j) == geno)
+            {
+                ++res;
+            }
+        }
+    }
+    return res;
 }
 
-bool ScistHaplotypeMat ::ReadFromFileHapProb(std::ifstream &infile,
-                                             double &prob0) {
-  // read in the prob of haploid allele: 0.6 means prob of 0 is 0.6
-  // assume prob of 0 + prob of 1 = 1
-  infile >> prob0;
-  return true;
+int ScistGenGenotypeMat ::FindCellByName(const std::string &strName) const
+{
+    //
+    int numberHaps = GetNumHaps();
+    for (int i = 0; i < numberHaps; ++i)
+    {
+        if (GetGenotypeName(i) == strName)
+        {
+            return i;
+        }
+    }
+    return -1;
 }
 
-void ScistHaplotypeMat ::SetSize(int numHaps, int numSites) {
-  matHaplotypes.SetSize(numHaps, numSites);
+void ScistGenGenotypeMat ::Dump() const
+{
+    cout << "Genotype names: ";
+    int numberHaps = GetNumHaps();
+    for (int i = 0; i < numberHaps; ++i)
+    {
+        cout << GetGenotypeName(i) << "  ";
+    }
+    cout << endl;
+}
 
-  matHaplotypesProb0.clear();
-  matHaplotypesProb0.resize(numHaps);
+void ScistGenGenotypeMat ::ChangeGenosAtPositions(const std::set<std::pair<std::pair<int, int>, int>> &listChangedPlaces)
+{
+    //
+    for (std::set<std::pair<std::pair<int, int>, int>>::const_iterator it = listChangedPlaces.begin(); it != listChangedPlaces.end(); ++it)
+    {
+        //
+        SetGenotypeAt(it->first.first, it->first.second, it->second);
+    }
+}
 
-  bool fNameInit = GetNumNames() > 0;
+// *************************************************************************************
+// genotypes: binary matrix
 
-  for (int i = 0; i < numHaps; ++i) {
-    matHaplotypesProb0[i].resize(numSites);
+ScistHaplotypeMat ::ScistHaplotypeMat()
+{
+}
 
-    // by default, use the numericals, starting from one
-    if (fNameInit == false) {
-      string str = std::to_string(i + 1);
-      AddGenotypeName(str);
+ScistGenGenotypeMat *ScistHaplotypeMat ::Copy() const
+{
+    //
+    ScistHaplotypeMat *pMatCopy = new ScistHaplotypeMat();
 
-      // cout << "Init name: " << str << endl;
+    int numberNames = GetNumNames();
+    int numberHaps = GetNumHaps();
+    int numberSites = GetNumSites();
+    for (int i = 0; i < numberNames; ++i)
+    {
+        pMatCopy->AddGenotypeName(GetGenotypeName(i));
     }
-  }
-}
 
-void ScistHaplotypeMat ::SetGenotypeAt(int sc, int site, int geno) {
-  matHaplotypes(sc, site) = geno;
-}
+    pMatCopy->SetSize(numberHaps, GetNumSites());
 
-void ScistHaplotypeMat ::AddGenotypeAt(int sc, int site, int geno) {
-  // append the genotype into it
-  int genoThis = GetGenotypeAt(sc, site);
-  if (genoThis == 0 && geno == 1) {
-    SetGenotypeAt(sc, site, 1);
-  }
+    //
+    for (int i = 0; i < numberHaps; ++i)
+    {
+        for (int j = 0; j < numberSites; ++j)
+        {
+            pMatCopy->SetGenotypeAt(i, j, GetGenotypeAt(i, j));
+            pMatCopy->SetGenotypeProbAt(i, j, GetGenotypeProbAllele0At(i, j));
+        }
+    }
+
+    return pMatCopy;
 }
 
-int ScistHaplotypeMat ::GetAltGenotypeAt(int sc, int site) const {
-  int genoThis = GetGenotypeAt(sc, site);
-  if (genoThis == 0) {
-    return 1;
-  } else {
-    return 0;
-  }
-}
-
-double ScistHaplotypeMat ::GetGenotypeProbAllele0At(int sc, int site) const {
-  // return proble of allele 0
-  return this->matHaplotypesProb0[sc][site];
-}
-
-void ScistHaplotypeMat ::SetGenotypeProbAt(int sc, int site, double prob) {
-  this->matHaplotypesProb0[sc][site] = prob;
-}
-
-void ScistHaplotypeMat ::SetGenotypeProbOfGenoAt(int sc, int site, int geno,
-                                                 double prob) {
-  if (geno == 0) {
-    SetGenotypeProbAt(sc, site, prob);
-  } else {
-    SetGenotypeProbAt(sc, site, 1.0 - prob);
-  }
-}
-
-int ScistHaplotypeMat ::GetGenotypeAt(int sc, int site) const {
-  return matHaplotypes(sc, site);
-}
-
-void ScistHaplotypeMat ::FindMaximalCompatSites(
-    const std::vector<double> &wtSites,
-    std::vector<std::map<int, std::set<int> > > &listSetSitesCompat,
-    int maxNumSets,
-    const std::set<std::pair<int, int> > *pSetCompatPairs) const {
-  //#if 0
-  // const double DEF_MIN_FRAC = 0.5;
-
-  // we find the maximum weightd clique of compatible pairs
-  // construct compat pairs if not done yet
-  set<std::pair<int, int> > *pSetCompatPairsUse =
-      const_cast<set<std::pair<int, int> > *>(pSetCompatPairs);
-  set<pair<int, int> > setCompatPairsAlt;
-  if (pSetCompatPairsUse == NULL) {
-    ConsCompatMap(setCompatPairsAlt);
-    pSetCompatPairsUse = &setCompatPairsAlt;
-  }
-
-  // implement the simple heuristics by Johnson 1974
-  // BinaryMatrix &matHaplotypesUse = const_cast<BinaryMatrix &>(
-  // this->matHaplotypes );
-
-  //
-  listSetSitesCompat.clear();
-  // vector<vector<bool> > vecHapsFullyCompat( GetNumSites() );
-  // for(int i=0; i<GetNumSites(); ++i)
-  //{
-  //    vecHapsFullyCompat[i].resize( GetNumSites() );
-  //}
-
-  // for(int s1 = 0; s1<GetNumSites(); ++s1)
-  //{
-  //    vecHapsFullyCompat[s1][s1] = true;
-  //    for(int s2=s1+1; s2<GetNumSites(); ++s2)
-  //    {
-  //        // root allele: 0
-  //        bool fCompat = matHaplotypesUse.IsCompatibleRooted(s1, s2, 0, 0);
-  //        vecHapsFullyCompat[s1][s2] = fCompat;
-  //        vecHapsFullyCompat[s2][s1] = fCompat;
-  // cout << "Sites " << s1 << "," << s2 << ": ";
-  // if(fCompat)
-  //{
-  // cout << " compatible\n";
-  //}
-  // else
-  //{
-  // cout << " not compatible\n";
-  //}
-  //    }
-  //}
-
-  //
-  set<pair<set<int>, set<int> > > listSetMaxCompatChosen;
-  // init
-  set<int> ss;
-  set<int> setSitesRemainInit;
-  PopulateSetWithInterval(setSitesRemainInit, 0, GetNumSites() - 1);
-  pair<set<int>, set<int> > pp(ss, setSitesRemainInit);
-  listSetMaxCompatChosen.insert(pp);
-
-  while (true) {
+bool ScistHaplotypeMat ::ReadFromFile(std::ifstream &infile, int numSites, int numSCs, bool fSiteName)
+{
+    //cout << "ScistHaplotypeMat :: ReadFromFile: numSites: " << numSites << ", numSCs: " << numSCs << endl;
     //
-    set<pair<set<int>, set<int> > > listSetMaxCompatChosenNext;
-
-    for (set<pair<set<int>, set<int> > >::iterator it =
-             listSetMaxCompatChosen.begin();
-         it != listSetMaxCompatChosen.end(); ++it) {
-      set<int> setSitesRemain = it->second;
-
-      if (setSitesRemain.size() == 0) {
-        continue;
-      }
-
-      set<int> setMaxCompatChosen = it->first;
-
-      // find the one that is the most compatible with remaining sites
-      // int maxNumCompat = -1;
-      double wtSiteMax = -1.0 * HAP_MAX_INT;
-
-      vector<int> listSitesNext;
-      for (set<int>::iterator it = setSitesRemain.begin();
-           it != setSitesRemain.end(); ++it) {
-        //    int numCompat = 0;
-        //    for(set<int> :: iterator it2 = setSitesRemain.begin(); it2 !=
-        //    setSitesRemain.end(); ++it2)
-        //    {
-        //        if( AreSitesCompatInMap(*pSetCompatPairsUse, *it,*it2) )
-        //        {
-        //            ++numCompat;
-        //        }
-        //    }
-        double wtCur = wtSites[*it];
-        //    if( numCompat > maxNumCompat )
-        if (wtCur > wtSiteMax) {
-          listSitesNext.clear();
-          listSitesNext.push_back(*it);
-          wtSiteMax = wtCur;
-          // maxNumCompat = numCompat;
+    // assume each site is independent
+    SetSize(numSCs, numSites);
+    for (int i = 0; i < numSites; ++i)
+    {
+        string strName;
+        if (fSiteName)
+        {
+            infile >> strName;
         }
-        //    else if( numCompat == maxNumCompat )
-        if (wtCur == wtSiteMax) {
-          listSitesNext.push_back(*it);
+        else
+        {
+            strName = std::to_string(i + 1);
         }
-      }
-
-      // if weight is too small now, stop if we have already get enough
-      // if( wtSiteMax < 1.0)
-      //{
-      //    if( ((int)DEF_MIN_FRAC*GetNumSites()) <=
-      //    (int)setMaxCompatChosen.size() )
-      //    {
-      //        break;
-      //    }
-      //}
-
-      for (int jj = 0; jj < (int)listSitesNext.size(); ++jj) {
-        // don't continue adding if we are at the limit
-        if ((int)listSetMaxCompatChosenNext.size() > maxNumSets) {
-          continue;
+        AddSiteName(strName);
+
+        //cout << "Read in site: " << i << endl;
+        for (int j = 0; j < numSCs; ++j)
+        {
+            double prob0 = 0.0;
+            bool res = ReadFromFileHapProb(infile, prob0);
+            if (res == false)
+            {
+                return false;
+            }
+            // choose the allele w/ higher prob
+            int allele = 0;
+            if (prob0 < 0.5)
+            {
+                allele = 1;
+            }
+            SetGenotypeAt(j, i, allele);
+
+            matHaplotypesProb0[j][i] = prob0;
         }
+    }
+
+    return true;
+}
+
+bool ScistHaplotypeMat ::ReadFromFileHapProb(std::ifstream &infile, double &prob0)
+{
+    // read in the prob of haploid allele: 0.6 means prob of 0 is 0.6
+    // assume prob of 0 + prob of 1 = 1
+    infile >> prob0;
+    return true;
+}
+
+void ScistHaplotypeMat ::SetSize(int numHaps, int numSites)
+{
+    matHaplotypes.SetSize(numHaps, numSites);
 
-        int sChose = listSitesNext[jj];
-        set<int> setMaxCompatChosenNew = setMaxCompatChosen;
-        setMaxCompatChosenNew.insert(sChose);
-
-        // remove any sites that are incompatible with the chosen sites
-        set<int> setSitesRemainNew;
-        for (set<int>::iterator it = setSitesRemain.begin();
-             it != setSitesRemain.end(); ++it) {
-          if (AreSitesCompatInMap(*pSetCompatPairsUse, sChose, *it) == true) {
-            setSitesRemainNew.insert(*it);
-          }
+    matHaplotypesProb0.clear();
+    matHaplotypesProb0.resize(numHaps);
+
+    bool fNameInit = GetNumNames() > 0;
+
+    for (int i = 0; i < numHaps; ++i)
+    {
+        matHaplotypesProb0[i].resize(numSites);
+
+        // by default, use the numericals, starting from one
+        if (fNameInit == false)
+        {
+            string str = std::to_string(i + 1);
+            AddGenotypeName(str);
+
+            //cout << "Init name: " << str << endl;
         }
-        setSitesRemainNew.erase(sChose);
+    }
+}
 
-        pair<set<int>, set<int> > pp(setMaxCompatChosenNew, setSitesRemainNew);
+void ScistHaplotypeMat ::SetGenotypeAt(int sc, int site, int geno)
+{
+    matHaplotypes(sc, site) = geno;
+}
+
+void ScistHaplotypeMat ::AddGenotypeAt(int sc, int site, int geno)
+{
+    // append the genotype into it
+    int genoThis = GetGenotypeAt(sc, site);
+    if (genoThis == 0 && geno == 1)
+    {
+        SetGenotypeAt(sc, site, 1);
+    }
+}
 
-        listSetMaxCompatChosenNext.insert(pp);
-      }
+int ScistHaplotypeMat ::GetAltGenotypeAt(int sc, int site) const
+{
+    int genoThis = GetGenotypeAt(sc, site);
+    if (genoThis == 0)
+    {
+        return 1;
     }
+    else
+    {
+        return 0;
+    }
+}
 
-    //
-    if (listSetMaxCompatChosenNext.size() == 0) {
-      //
-      break;
-    } else {
-      listSetMaxCompatChosen = listSetMaxCompatChosenNext;
-    }
-  }
-
-  YW_ASSERT_INFO(listSetMaxCompatChosen.size() > 0, "Cannot be empty");
-  for (set<pair<set<int>, set<int> > >::iterator it =
-           listSetMaxCompatChosen.begin();
-       it != listSetMaxCompatChosen.end(); ++it) {
-    // cout << "Maximum clique found by the heuristic: ";
-    // DumpIntSet( it->first );
-    map<int, set<int> > mm;
-    for (set<int>::iterator it2 = it->first.begin(); it2 != it->first.end();
-         ++it2) {
-      set<int> ss;
-      GetMutRowsHapAtSite(*it2, ss);
-      mm[*it2] = ss;
-    }
-    listSetSitesCompat.push_back(mm);
-  }
-
-  //#endif
-
-#if 0
-    BinaryMatrix &matHaplotypesUse = const_cast<BinaryMatrix &>( this->matHaplotypes );
+double ScistHaplotypeMat ::GetGenotypeProbAllele0At(int sc, int site) const
+{
+    // return proble of allele 0
+    return this->matHaplotypesProb0[sc][site];
+}
 
-    //
-    listSetSitesCompat.clear();
-    vector<vector<bool> > vecHapsFullyCompat( GetNumSites() );
-    for(int i=0; i<GetNumSites(); ++i)
+void ScistHaplotypeMat ::SetGenotypeProbAt(int sc, int site, double prob)
+{
+    this->matHaplotypesProb0[sc][site] = prob;
+}
+
+void ScistHaplotypeMat ::SetGenotypeProbOfGenoAt(int sc, int site, int geno, double prob)
+{
+    if (geno == 0)
     {
-        vecHapsFullyCompat[i].resize( GetNumSites() );
+        SetGenotypeProbAt(sc, site, prob);
     }
+    else
+    {
+        SetGenotypeProbAt(sc, site, 1.0 - prob);
+    }
+}
+
+int ScistHaplotypeMat ::GetGenotypeAt(int sc, int site) const
+{
+    return matHaplotypes(sc, site);
+}
+
+void ScistHaplotypeMat ::FindMaximalCompatSites(const std::vector<double> &wtSites, std::vector<std::map<int, std::set<int>>> &listSetSitesCompat, int maxNumSets, const std::set<std::pair<int, int>> *pSetCompatPairs) const
+{
 
-    for(int s1 = 0; s1<GetNumSites(); ++s1)
+    // we find the maximum weightd clique of compatible pairs
+    // construct compat pairs if not done yet
+    set<std::pair<int, int>> *pSetCompatPairsUse = const_cast<set<std::pair<int, int>> *>(pSetCompatPairs);
+    set<pair<int, int>> setCompatPairsAlt;
+    if (pSetCompatPairsUse == NULL)
     {
-        for(int s2=s1+1; s2<GetNumSites(); ++s2)
-        {
-            // root allele: 0
-            bool fCompat = matHaplotypesUse.IsCompatibleRooted(s1, s2, 0, 0);
-            vecHapsFullyCompat[s1][s2] = fCompat;
-            vecHapsFullyCompat[s2][s1] = fCompat;
-//cout << "Sites " << s1 << "," << s2 << ": ";
-//if(fCompat)
-//{
-//cout << " compatible\n";
-//}
-//else
-//{
-//cout << " not compatible\n";
-//}
-        }
+        ConsCompatMap(setCompatPairsAlt);
+        pSetCompatPairsUse = &setCompatPairsAlt;
     }
-    // find maximal compatible components
-    set< set<int> > setMaximalComps;
-    // start by putting all compatible pairs
-    for(int s1 = 0; s1<GetNumSites(); ++s1)
-    {
-        set<int> ss;
-        ss.insert(s1);
-        setMaximalComps.insert(ss);
-    }
-    // find larger
-    while(true)
-    {
-        // every time, make sure size is not too large
-        TrimCliquesMaxDiff( setMaximalComps, maxNumSets );
-//cout << "Size of current cliques to grow: " << setMaximalComps.size() << endl;
-//for( set<set<int> > :: iterator it = setMaximalComps.begin(); it != setMaximalComps.end(); ++it)
-//{
-//DumpIntSet(*it);
-//}
-
-        set< set<int> > setMaximalCompsNext;
-        // try to grow by adding one more
-        for( set<set<int> > :: iterator it = setMaximalComps.begin(); it != setMaximalComps.end(); ++it )
+
+    // implement the simple heuristics by Johnson 1974
+    //BinaryMatrix &matHaplotypesUse = const_cast<BinaryMatrix &>( this->matHaplotypes );
+
+    //
+    listSetSitesCompat.clear();
+
+    //
+    set<pair<set<int>, set<int>>> listSetMaxCompatChosen;
+    // init
+    set<int> ss;
+    set<int> setSitesRemainInit;
+    PopulateSetWithInterval(setSitesRemainInit, 0, GetNumSites() - 1);
+    pair<set<int>, set<int>> pp(ss, setSitesRemainInit);
+    listSetMaxCompatChosen.insert(pp);
+
+    while (true)
+    {
+        //
+        set<pair<set<int>, set<int>>> listSetMaxCompatChosenNext;
+
+        for (set<pair<set<int>, set<int>>>::iterator it = listSetMaxCompatChosen.begin(); it != listSetMaxCompatChosen.end(); ++it)
         {
-            for(int s=0; s<GetNumSites(); ++s)
+            set<int> setSitesRemain = it->second;
+
+            if (setSitesRemain.size() == 0)
             {
-                if(  it->find(s) == it->end() )
+                continue;
+            }
+
+            set<int> setMaxCompatChosen = it->first;
+
+            // find the one that is the most compatible with remaining sites
+            //int maxNumCompat = -1;
+            double wtSiteMax = -1.0 * HAP_MAX_INT;
+
+            vector<int> listSitesNext;
+            for (set<int>::iterator it = setSitesRemain.begin(); it != setSitesRemain.end(); ++it)
+            {
+                //    int numCompat = 0;
+                //    for(set<int> :: iterator it2 = setSitesRemain.begin(); it2 != setSitesRemain.end(); ++it2)
+                //    {
+                //        if( AreSitesCompatInMap(*pSetCompatPairsUse, *it,*it2) )
+                //        {
+                //            ++numCompat;
+                //        }
+                //    }
+                double wtCur = wtSites[*it];
+                //    if( numCompat > maxNumCompat )
+                if (wtCur > wtSiteMax)
                 {
-                    bool fCompat = true;
-                    for(set<int> :: iterator it2 = it->begin(); it2 != it->end(); ++it2 )
-                    {
-                        if( vecHapsFullyCompat[ s ][ *it2 ]  == false )
-                        {
-                            fCompat = false;
-                            break;
-                        }
-                    }
-                    if( fCompat )
+                    listSitesNext.clear();
+                    listSitesNext.push_back(*it);
+                    wtSiteMax = wtCur;
+                    //maxNumCompat = numCompat;
+                }
+                //    else if( numCompat == maxNumCompat )
+                if (wtCur == wtSiteMax)
+                {
+                    listSitesNext.push_back(*it);
+                }
+            }
+
+            for (int jj = 0; jj < (int)listSitesNext.size(); ++jj)
+            {
+                // don't continue adding if we are at the limit
+                if ((int)listSetMaxCompatChosenNext.size() > maxNumSets)
+                {
+                    continue;
+                }
+
+                int sChose = listSitesNext[jj];
+                set<int> setMaxCompatChosenNew = setMaxCompatChosen;
+                setMaxCompatChosenNew.insert(sChose);
+
+                // remove any sites that are incompatible with the chosen sites
+                set<int> setSitesRemainNew;
+                for (set<int>::iterator it = setSitesRemain.begin(); it != setSitesRemain.end(); ++it)
+                {
+                    if (AreSitesCompatInMap(*pSetCompatPairsUse, sChose, *it) == true)
                     {
-                        set<int> ss = *it;
-                        ss.insert( s );
-                        setMaximalCompsNext.insert(ss);
-//cout << "Growing a subset: ";
-//DumpIntSet(ss);
+                        setSitesRemainNew.insert(*it);
                     }
                 }
+                setSitesRemainNew.erase(sChose);
+
+                pair<set<int>, set<int>> pp(setMaxCompatChosenNew, setSitesRemainNew);
+
+                listSetMaxCompatChosenNext.insert(pp);
             }
         }
-        if( setMaximalCompsNext.size() == 0 )
+
+        //
+        if (listSetMaxCompatChosenNext.size() == 0)
         {
+            //
             break;
         }
         else
         {
-            setMaximalComps = setMaximalCompsNext;
+            listSetMaxCompatChosen = listSetMaxCompatChosenNext;
         }
     }
-    //
-    //TrimCliquesMaxDiff( setMaximalComps, maxNumSets );
 
-    YW_ASSERT_INFO( setMaximalComps.size() > 0, "Cannot be empty" );
-    for( set<set<int> > :: iterator it = setMaximalComps.begin(); it != setMaximalComps.end(); ++it )
+    YW_ASSERT_INFO(listSetMaxCompatChosen.size() > 0, "Cannot be empty");
+    for (set<pair<set<int>, set<int>>>::iterator it = listSetMaxCompatChosen.begin(); it != listSetMaxCompatChosen.end(); ++it)
     {
-cout << "Clique found: ";
-DumpIntSet(*it);
-        map<int, std::set<int> >  setSitesCompat;
-
-        set<int> ssChosen = *it;
-        for(set<int> :: iterator it = ssChosen.begin(); it != ssChosen.end(); ++it)
+        //cout << "Maximum clique found by the heuristic: ";
+        //DumpIntSet( it->first );
+        map<int, set<int>> mm;
+        for (set<int>::iterator it2 = it->first.begin(); it2 != it->first.end(); ++it2)
         {
             set<int> ss;
-            GetMutRowsHapAtSite(*it, ss);
-            setSitesCompat[*it] = ss;
+            GetMutRowsHapAtSite(*it2, ss);
+            mm[*it2] = ss;
         }
-        listSetSitesCompat.push_back(setSitesCompat);
+        listSetSitesCompat.push_back(mm);
     }
-#endif
 }
 
-int ScistHaplotypeMat ::GetNumSites() const {
-  return matHaplotypes.GetColNum();
+int ScistHaplotypeMat ::GetNumSites() const
+{
+    return matHaplotypes.GetColNum();
 }
 
-int ScistHaplotypeMat ::GetNumHaps() const { return matHaplotypes.GetRowNum(); }
+int ScistHaplotypeMat ::GetNumHaps() const
+{
+    return matHaplotypes.GetRowNum();
+}
 
-void ScistHaplotypeMat ::GetMutRowsHapAtSite(int site,
-                                             std::set<int> &setRows) const {
-  // any allele w/ non-zero is mutant
-  setRows.clear();
-  for (int r = 0; r < matHaplotypes.GetRowNum(); ++r) {
-    if (matHaplotypes(r, site) == 1) {
-      setRows.insert(r);
+void ScistHaplotypeMat ::GetMutRowsHapAtSite(int site, std::set<int> &setRows) const
+{
+    // any allele w/ non-zero is mutant
+    setRows.clear();
+    int rowNumber = matHaplotypes.GetRowNum();
+    for (int r = 0; r < rowNumber; ++r)
+    {
+        if (matHaplotypes(r, site) == 1)
+        {
+            setRows.insert(r);
+        }
     }
-  }
 }
 
-void ScistHaplotypeMat ::GetRowsWithGenoAtSite(int site, int geno,
-                                               std::set<int> &setRows) const {
-  setRows.clear();
-  if (geno == 1) {
-    GetMutRowsHapAtSite(site, setRows);
-  } else if (geno == 0) {
-    // get the complement
+void ScistHaplotypeMat ::GetRowsWithGenoAtSite(int site, int geno, std::set<int> &setRows) const
+{
     setRows.clear();
-    PopulateSetWithInterval(setRows, 0, GetNumHaps() - 1);
-    set<int> setRows1;
-    GetMutRowsHapAtSite(site, setRows1);
-    SubtractSets(setRows, setRows1);
-  }
-}
-
-double ScistHaplotypeMat ::GetScoreForGeno(int scIndex, int site,
-                                           int genotype) const {
-  int allele = this->matHaplotypes(scIndex, site);
-  if (allele == genotype) {
-    // when greeing, score is 0
-    return 0.0;
-  }
-
-  // for now, only use default scoring
-  double res = 0.0;
-  double prob0 = this->matHaplotypesProb0[scIndex][site];
-  double prob1 = 1.0 - prob0;
-  if (genotype == 1) {
-    // change from 0 to 1
-    if (prob1 <= 0.0) {
-      res = HAP_MAX_INT * 1.0;
-    } else {
-      res = log(prob0 / prob1);
-    }
-  } else {
-    if (prob0 <= 0.0) {
-      res = HAP_MAX_INT * 1.0;
-    } else {
-      res = log(prob1 / prob0);
-    }
-  }
-  if (res < 0.0) {
-    this->Dump();
-    cout << "cell: " << scIndex << ", site: " << site
-         << ", genotype: " << genotype << ", prob0: " << prob0 << endl;
-  }
-  YW_ASSERT_INFO(res >= 0.0, "Prob: wrong");
-  return res;
-}
-
-bool ScistHaplotypeMat ::IsNoninformative(int site) const {
-  //
-  BinaryMatrix &matHaplotypesUse =
-      const_cast<BinaryMatrix &>(this->matHaplotypes);
-  return matHaplotypesUse.IsColNonInformative(site);
-}
-
-bool ScistHaplotypeMat ::IsCompatible(int s1, int s2) const {
-  //
-  BinaryMatrix &matHaplotypesUse =
-      const_cast<BinaryMatrix &>(this->matHaplotypes);
-  return matHaplotypesUse.IsCompatible(s1, s2);
-}
-
-std::string ScistHaplotypeMat ::ConsTree() const {
-  //
-  // construct phylogeny
-  vector<int> rootZero;
-  for (int i = 0; i < GetNumSites(); ++i) {
-    rootZero.push_back(0);
-  }
-  PhylogenyTree phTree;
-  phTree.SetRoot(rootZero);
-  phTree.ConsOnBinMatrix(this->matHaplotypes);
-  phTree.RemoveDegreeTwoNodes();
-
-  // now assign leaf labels
-  map<string, string> mapIdToLabels;
-  for (int i = 0; i < GetNumHaps(); ++i) {
-    // cout << "i: " << i << ", name: " << this->genosInput.GetGenotypeName(i)
-    // << endl;
-    string str = "(" + std::to_string(i) + ")";
-    mapIdToLabels[str] = GetGenotypeName(i);
-  }
-  phTree.ReassignLeafLabels(mapIdToLabels);
-
-  string res;
-  phTree.ConsNewickSorted(res);
-  // phTree.ConsNewick(res, false, 0.0, true);
-  return res;
-}
-
-double ScistHaplotypeMat ::SumLogProbs() const {
-  //
-  double res = 0.0;
-  for (int i = 0; i < (int)matHaplotypesProb0.size(); ++i) {
-    res += GetSumOfVecElements(matHaplotypesProb0[i]);
-  }
-  return res;
-}
-
-void ScistHaplotypeMat ::Dump() const {
-  ScistGenGenotypeMat ::Dump();
-
-  //
-  cout << "Matrix: [" << GetNumHaps() << "," << GetNumSites() << "]" << endl;
-  this->matHaplotypes.Dump();
-#if 0
-    cout << "Clusters\n";
-    for(int c=0; c<GetNumSites(); ++c)
-    {
-        cout << "Site " << c+1 << ": ";
-        set<int> rowsMut;
-        this->matHaplotypes.GetRowsWithAllele(c, 1, rowsMut);
-        DumpIntSet(rowsMut);
-    }
-#endif
-  cout << "Probabilities: \n";
-  for (int i = 0; i < (int)matHaplotypesProb0.size(); ++i) {
-    DumpDoubleVec(matHaplotypesProb0[i]);
-  }
-}
-
-void ScistHaplotypeMat ::OutputImput(const string *pStrDesc) const {
-  //
-  cout << "Lineages: ";
-  for (int i = 0; i < GetNumNames(); ++i) {
-    cout << GetGenotypeName(i) << "  ";
-  }
-  cout << endl;
-  if (pStrDesc != NULL) {
-    cout << *pStrDesc << endl;
-  } else {
-    cout << "Imputed genotypes: \n";
-  }
-  for (int s = 0; s < GetNumSites(); ++s) {
-    cout << "Site " << setw(6) << s + 1 << ":\t";
-
-    for (int i = 0; i < GetNumHaps(); ++i) {
-      cout << GetGenotypeAt(i, s) << " ";
+    if (geno == 1)
+    {
+        GetMutRowsHapAtSite(site, setRows);
+    }
+    else if (geno == 0)
+    {
+        // get the complement
+        setRows.clear();
+        PopulateSetWithInterval(setRows, 0, GetNumHaps() - 1);
+        set<int> setRows1;
+        GetMutRowsHapAtSite(site, setRows1);
+        SubtractSets(setRows, setRows1);
+    }
+}
+
+double ScistHaplotypeMat ::GetScoreForGeno(int scIndex, int site, int genotype) const
+{
+    int allele = this->matHaplotypes(scIndex, site);
+    if (allele == genotype)
+    {
+        // when greeing, score is 0
+        return 0.0;
+    }
+
+    // for now, only use default scoring
+    double res = 0.0;
+    double prob0 = this->matHaplotypesProb0[scIndex][site];
+    double prob1 = 1.0 - prob0;
+    if (genotype == 1)
+    {
+        // change from 0 to 1
+        if (prob1 <= 0.0)
+        {
+            res = HAP_MAX_INT * 1.0;
+        }
+        else
+        {
+            res = log(prob0 / prob1);
+        }
+    }
+    else
+    {
+        if (prob0 <= 0.0)
+        {
+            res = HAP_MAX_INT * 1.0;
+        }
+        else
+        {
+            res = log(prob1 / prob0);
+        }
+    }
+    if (res < 0.0)
+    {
+        this->Dump();
+        cout << "cell: " << scIndex << ", site: " << site << ", genotype: " << genotype << ", prob0: " << prob0 << endl;
+    }
+    YW_ASSERT_INFO(res >= 0.0, "Prob: wrong");
+    return res;
+}
+
+bool ScistHaplotypeMat ::IsNoninformative(int site) const
+{
+    //
+    BinaryMatrix &matHaplotypesUse = const_cast<BinaryMatrix &>(this->matHaplotypes);
+    return matHaplotypesUse.IsColNonInformative(site);
+}
+
+bool ScistHaplotypeMat ::IsCompatible(int s1, int s2) const
+{
+    //
+    BinaryMatrix &matHaplotypesUse = const_cast<BinaryMatrix &>(this->matHaplotypes);
+    return matHaplotypesUse.IsCompatible(s1, s2);
+}
+
+std::string ScistHaplotypeMat ::ConsTree() const
+{
+    //
+    // construct phylogeny
+    std::vector<int> rootZero;
+    int numberSites = GetNumSites();
+    for (int i = 0; i < numberSites; ++i)
+    {
+        rootZero.push_back(0);
+    }
+    PhylogenyTree phTree;
+    phTree.SetRoot(rootZero);
+    phTree.ConsOnBinMatrix(this->matHaplotypes);
+    phTree.RemoveDegreeTwoNodes();
+
+    // now assign leaf labels
+    std::map<string, string> mapIdToLabels;
+    int numberHaps = GetNumHaps();
+    for (int i = 0; i < numberHaps; ++i)
+    {
+        //cout << "i: " << i << ", name: " << this->genosInput.GetGenotypeName(i) << endl;
+        string str = "(" + std::to_string(i) + ")";
+        mapIdToLabels[str] = GetGenotypeName(i);
+    }
+    phTree.ReassignLeafLabels(mapIdToLabels);
+
+    std::string res;
+    phTree.ConsNewickSorted(res);
+    //phTree.ConsNewick(res, false, 0.0, true);
+    return res;
+}
+
+double ScistHaplotypeMat ::SumLogProbs() const
+{
+    //
+    double res = 0.0;
+    for (int i = 0; i < (int)matHaplotypesProb0.size(); ++i)
+    {
+        res += GetSumOfVecElements(matHaplotypesProb0[i]);
+    }
+    return res;
+}
+
+void ScistHaplotypeMat ::Dump() const
+{
+    ScistGenGenotypeMat ::Dump();
+
+    //
+    int numberHaps = GetNumHaps();
+    int numberSites = GetNumSites();
+    cout << "Matrix: [" << numberHaps << "," << numberSites << "]" << endl;
+    this->matHaplotypes.Dump();
+
+    cout << "Probabilities: \n";
+    for (int i = 0; i < (int)matHaplotypesProb0.size(); ++i)
+    {
+        DumpDoubleVec(matHaplotypesProb0[i]);
+    }
+}
+
+void ScistHaplotypeMat ::OutputImput(const string *pStrDesc) const
+{
+    //
+    cout << "Lineages: ";
+    int numberNames = GetNumNames();
+
+    for (int i = 0; i < numberNames; ++i)
+    {
+        cout << GetGenotypeName(i) << "  ";
     }
     cout << endl;
-  }
+
+    if (pStrDesc != NULL)
+    {
+        cout << *pStrDesc << endl;
+    }
+    else
+    {
+        cout << "Imputed genotypes: \n";
+    }
+
+    int numberSites = GetNumSites();
+    int numberHaps = GetNumHaps();
+    for (int s = 0; s < numberSites; ++s)
+    {
+        cout << "Site " << setw(6) << s + 1 << ":\t";
+
+        for (int i = 0; i < numberHaps; ++i)
+        {
+            cout << GetGenotypeAt(i, s) << " ";
+        }
+        cout << endl;
+    }
 }
 
-bool ScistHaplotypeMat ::IsProbSignificant(double prob, double thresVal) const {
-  //
-  const double probConst = 0.5;
-  if (prob < probConst && prob > (probConst - thresVal / 2)) {
-    return false;
-  }
-  if (prob > probConst && prob < (probConst + thresVal / 2)) {
-    return false;
-  }
-  return true;
+bool ScistHaplotypeMat ::IsProbSignificant(double prob, double thresVal) const
+{
+    //
+    const double probConst = 0.5;
+    if (prob < probConst && prob > (probConst - thresVal / 2))
+    {
+        return false;
+    }
+    if (prob > probConst && prob < (probConst + thresVal / 2))
+    {
+        return false;
+    }
+    return true;
 }
 
 // *************************************************************************************
 // genotypes: ternary matrix
 
-ScistTernaryMat ::ScistTernaryMat() {}
+ScistTernaryMat ::ScistTernaryMat()
+{
+}
 
-ScistGenGenotypeMat *ScistTernaryMat ::Copy() const {
-  //
-  ScistTernaryMat *pMatCopy = new ScistTernaryMat();
+ScistGenGenotypeMat *ScistTernaryMat ::Copy() const
+{
+    //
+    ScistTernaryMat *pMatCopy = new ScistTernaryMat();
 
-  for (int i = 0; i < GetNumNames(); ++i) {
-    pMatCopy->AddGenotypeName(GetGenotypeName(i));
-  }
+    int numberNames = GetNumNames();
+    for (int i = 0; i < numberNames; ++i)
+    {
+        pMatCopy->AddGenotypeName(GetGenotypeName(i));
+    }
 
-  pMatCopy->SetSize(GetNumHaps(), GetNumSites());
+    int numberHaps = GetNumHaps();
+    pMatCopy->SetSize(numberHaps, GetNumSites());
 
-  //
-  for (int i = 0; i < GetNumHaps(); ++i) {
-    for (int j = 0; j < GetNumSites(); ++j) {
-      pMatCopy->SetGenotypeAt(i, j, GetGenotypeAt(i, j));
-      pMatCopy->SetGenotypeProbOfGenoAt(i, j, 0, GetGenotypeProbAt(i, j, 0));
-      pMatCopy->SetGenotypeProbOfGenoAt(i, j, 1, GetGenotypeProbAt(i, j, 1));
+    //
+    int numberSites = GetNumSites();
+    for (int i = 0; i < numberHaps; ++i)
+    {
+        for (int j = 0; j < numberSites; ++j)
+        {
+            pMatCopy->SetGenotypeAt(i, j, GetGenotypeAt(i, j));
+            pMatCopy->SetGenotypeProbOfGenoAt(i, j, 0, GetGenotypeProbAt(i, j, 0));
+            pMatCopy->SetGenotypeProbOfGenoAt(i, j, 1, GetGenotypeProbAt(i, j, 1));
+        }
     }
-  }
 
-  return pMatCopy;
+    return pMatCopy;
 }
 
-bool ScistTernaryMat ::ReadFromFile(std::ifstream &infile, int numSites,
-                                    int numSCs, bool fSiteName) {
-  //
-  // assume each site is independent
-  SetSize(numSCs, numSites);
-  for (int i = 0; i < numSites; ++i) {
-    string strName;
-    if (fSiteName) {
-      infile >> strName;
-    } else {
-      strName = std::to_string(i + 1);
-    }
-    AddSiteName(strName);
+bool ScistTernaryMat ::ReadFromFile(std::ifstream &infile, int numSites, int numSCs, bool fSiteName)
+{
+    //
+    // assume each site is independent
+    SetSize(numSCs, numSites);
+    for (int i = 0; i < numSites; ++i)
+    {
+        string strName;
+        if (fSiteName)
+        {
+            infile >> strName;
+        }
+        else
+        {
+            strName = std::to_string(i + 1);
+        }
+        AddSiteName(strName);
 
-    // cout << "Read in site: " << i << endl;
-    for (int j = 0; j < numSCs; ++j) {
-      double prob0 = 0.0, prob1 = 0.0;
-      bool res = ReadFromFileTernaryProb(infile, prob0, prob1);
-      if (res == false) {
-        return false;
-      }
+        //cout << "Read in site: " << i << endl;
+        for (int j = 0; j < numSCs; ++j)
+        {
+            double prob0 = 0.0, prob1 = 0.0;
+            bool res = ReadFromFileTernaryProb(infile, prob0, prob1);
+            if (res == false)
+            {
+                return false;
+            }
 
-      SetGenotypeProbOfGenoAt(j, i, 0, prob0);
-      SetGenotypeProbOfGenoAt(j, i, 1, prob1);
+            SetGenotypeProbOfGenoAt(j, i, 0, prob0);
+            SetGenotypeProbOfGenoAt(j, i, 1, prob1);
 
-      // choose the allele w/ higher prob
-      int allele = 0;
-      double probMax = GetGenotypeProbAt(j, i, 0);
-      if (probMax < GetGenotypeProbAt(j, i, 1)) {
-        probMax = GetGenotypeProbAt(j, i, 1);
-        allele = 1;
-      }
-      if (probMax < GetGenotypeProbAt(j, i, 2)) {
-        probMax = GetGenotypeProbAt(j, i, 2);
-        allele = 2;
-      }
-      SetGenotypeAt(j, i, allele);
+            // choose the allele w/ higher prob
+            int allele = 0;
+            double probMax = GetGenotypeProbAt(j, i, 0);
+            if (probMax < GetGenotypeProbAt(j, i, 1))
+            {
+                probMax = GetGenotypeProbAt(j, i, 1);
+                allele = 1;
+            }
+            if (probMax < GetGenotypeProbAt(j, i, 2))
+            {
+                probMax = GetGenotypeProbAt(j, i, 2);
+                allele = 2;
+            }
+            SetGenotypeAt(j, i, allele);
+        }
     }
-  }
 
-  cout << "Input matrix: ";
-  this->matTernary.Dump();
+    cout << "Input matrix: ";
+    this->matTernary.Dump();
 
-  return true;
+    return true;
 }
 
-bool ScistTernaryMat ::ReadFromFileTernaryProb(std::ifstream &infile,
-                                               double &prob0, double &prob1) {
-  // read in the prob of allele: (0.6,0.1) 0.6 means prob of 0 is 0.6 and prob
-  // of 1 is 0.1 assume prob of 0 + 1 + 2 = 1
-  infile >> prob0 >> prob1;
-  return true;
+bool ScistTernaryMat ::ReadFromFileTernaryProb(std::ifstream &infile, double &prob0, double &prob1)
+{
+    // read in the prob of allele: (0.6,0.1) 0.6 means prob of 0 is 0.6 and prob of 1 is 0.1
+    // assume prob of 0 + 1 + 2 = 1
+    infile >> prob0 >> prob1;
+    return true;
 }
 
-void ScistTernaryMat ::SetSize(int numSCs, int numSites) {
-  matTernary.SetSize(numSCs, numSites);
+void ScistTernaryMat ::SetSize(int numSCs, int numSites)
+{
+    matTernary.SetSize(numSCs, numSites);
 
-  matTernaryProbs.clear();
-  matTernaryProbs.resize(numSCs);
+    matTernaryProbs.clear();
+    matTernaryProbs.resize(numSCs);
 
-  bool fNameInit = GetNumNames() > 0;
+    bool fNameInit = GetNumNames() > 0;
 
-  for (int i = 0; i < numSCs; ++i) {
-    matTernaryProbs[i].resize(numSites);
-    for (int s = 0; s < numSites; ++s) {
-      SetGenotypeProbOfGenoAt(i, s, 0, 1.0);
-      SetGenotypeProbOfGenoAt(i, s, 1, 0.0);
-    }
+    for (int i = 0; i < numSCs; ++i)
+    {
+        matTernaryProbs[i].resize(numSites);
+        for (int s = 0; s < numSites; ++s)
+        {
+            SetGenotypeProbOfGenoAt(i, s, 0, 1.0);
+            SetGenotypeProbOfGenoAt(i, s, 1, 0.0);
+        }
 
-    // by default, use the numericals, starting from one
-    if (fNameInit == false) {
-      string str = std::to_string(i + 1);
-      AddGenotypeName(str);
-      // cout << "Init name: " << str << endl;
+        // by default, use the numericals, starting from one
+        if (fNameInit == false)
+        {
+            string str = std::to_string(i + 1);
+            AddGenotypeName(str);
+            //cout << "Init name: " << str << endl;
+        }
     }
-  }
 }
 
-int ScistTernaryMat ::GetGenotypeAt(int sc, int site) const {
-  return matTernary(sc, site);
+int ScistTernaryMat ::GetGenotypeAt(int sc, int site) const
+{
+    return matTernary(sc, site);
 }
 
-int ScistTernaryMat ::GetAltGenotypeAt(int sc, int site) const {
-  YW_ASSERT_INFO(false, "Not supported1");
-  return 1;
+int ScistTernaryMat ::GetAltGenotypeAt(int sc, int site) const
+{
+    YW_ASSERT_INFO(false, "Not supported1");
+    return 1;
 }
 
-void ScistTernaryMat ::SetGenotypeAt(int sc, int site, int geno) {
-  matTernary(sc, site) = geno;
+void ScistTernaryMat ::SetGenotypeAt(int sc, int site, int geno)
+{
+    matTernary(sc, site) = geno;
 }
 
-void ScistTernaryMat ::AddGenotypeAt(int sc, int site, int geno) {
-  // append the genotype into it
-  int genoThis = GetGenotypeAt(sc, site);
-  if (genoThis != geno) {
-    SetGenotypeAt(sc, site, geno);
-  }
+void ScistTernaryMat ::AddGenotypeAt(int sc, int site, int geno)
+{
+    // append the genotype into it
+    int genoThis = GetGenotypeAt(sc, site);
+    if (genoThis != geno)
+    {
+        SetGenotypeAt(sc, site, geno);
+    }
 }
 
-double ScistTernaryMat ::GetGenotypeProbAllele0At(int sc, int site) const {
-  return GetGenotypeProbAt(sc, site, 0);
+double ScistTernaryMat ::GetGenotypeProbAllele0At(int sc, int site) const
+{
+    return GetGenotypeProbAt(sc, site, 0);
 }
 
-double ScistTernaryMat ::GetGenotypeProbAt(int sc, int site, int geno) const {
-  if (geno == 0) {
-    return this->matTernaryProbs[sc][site].first;
-  } else if (geno == 1) {
-    return this->matTernaryProbs[sc][site].second;
-  } else {
-    return 1.0 - GetGenotypeProbAt(sc, site, 0) -
-           GetGenotypeProbAt(sc, site, 1);
-  }
+double ScistTernaryMat ::GetGenotypeProbAt(int sc, int site, int geno) const
+{
+    if (geno == 0)
+    {
+        return this->matTernaryProbs[sc][site].first;
+    }
+    else if (geno == 1)
+    {
+        return this->matTernaryProbs[sc][site].second;
+    }
+    else
+    {
+        return 1.0 - GetGenotypeProbAt(sc, site, 0) - GetGenotypeProbAt(sc, site, 1);
+    }
 }
 
-void ScistTernaryMat ::SetGenotypeProbAt(int sc, int site, double prob) {
-  YW_ASSERT_INFO(false, "Not impelemented");
+void ScistTernaryMat ::SetGenotypeProbAt(int sc, int site, double prob)
+{
+    YW_ASSERT_INFO(false, "Not impelemented");
 }
 
-void ScistTernaryMat ::SetGenotypeProbOfGenoAt(int sc, int site, int geno,
-                                               double prob) {
-  if (geno == 0) {
-    matTernaryProbs[sc][site].first = prob;
-  } else if (geno == 1) {
-    matTernaryProbs[sc][site].second = prob;
-  } else {
-    YW_ASSERT_INFO(false, "Cannot only set the homozygous mutant probility");
-  }
+void ScistTernaryMat ::SetGenotypeProbOfGenoAt(int sc, int site, int geno, double prob)
+{
+    if (geno == 0)
+    {
+        matTernaryProbs[sc][site].first = prob;
+    }
+    else if (geno == 1)
+    {
+        matTernaryProbs[sc][site].second = prob;
+    }
+    else
+    {
+        YW_ASSERT_INFO(false, "Cannot only set the homozygous mutant probility");
+    }
 }
 
-void ScistTernaryMat ::FindMaximalCompatSites(
-    const std::vector<double> &wtSites,
-    std::vector<std::map<int, std::set<int> > > &listSetSitesCompat,
-    int maxNumSets,
-    const std::set<std::pair<int, int> > *pSetCompatPairs) const {
-  YW_ASSERT_INFO(false, "Not implemented");
+void ScistTernaryMat ::FindMaximalCompatSites(const std::vector<double> &wtSites, std::vector<std::map<int, std::set<int>>> &listSetSitesCompat, int maxNumSets, const std::set<std::pair<int, int>> *pSetCompatPairs) const
+{
+    YW_ASSERT_INFO(false, "Not implemented");
 }
 
-int ScistTernaryMat ::GetNumSites() const { return matTernary.GetColNum(); }
+int ScistTernaryMat ::GetNumSites() const
+{
+    return matTernary.GetColNum();
+}
 
-int ScistTernaryMat ::GetNumHaps() const { return matTernary.GetRowNum(); }
+int ScistTernaryMat ::GetNumHaps() const
+{
+    return matTernary.GetRowNum();
+}
 
-void ScistTernaryMat ::GetMutRowsHapAtSite(int site,
-                                           std::set<int> &setRows) const {
-  // YW_ASSERT_INFO(false, "Not supported2");
-  // for now, use both 1/2 rows
-  GetRowsWithGenoAtSite(site, 1, setRows);
-  set<int> setRows2;
-  GetRowsWithGenoAtSite(site, 2, setRows2);
-  UnionSets(setRows, setRows2);
+void ScistTernaryMat ::GetMutRowsHapAtSite(int site, std::set<int> &setRows) const
+{
+    //YW_ASSERT_INFO(false, "Not supported2");
+    // for now, use both 1/2 rows
+    GetRowsWithGenoAtSite(site, 1, setRows);
+    set<int> setRows2;
+    GetRowsWithGenoAtSite(site, 2, setRows2);
+    UnionSets(setRows, setRows2);
 }
 
-void ScistTernaryMat ::GetRowsWithGenoAtSite(int site, int geno,
-                                             std::set<int> &setRows) const {
-  setRows.clear();
-  for (int h = 0; h < GetNumHaps(); ++h) {
-    if (GetGenotypeAt(h, site) == geno) {
-      setRows.insert(h);
+void ScistTernaryMat ::GetRowsWithGenoAtSite(int site, int geno, std::set<int> &setRows) const
+{
+    setRows.clear();
+    for (int h = 0; h < GetNumHaps(); ++h)
+    {
+        if (GetGenotypeAt(h, site) == geno)
+        {
+            setRows.insert(h);
+        }
     }
-  }
 }
 
-double ScistTernaryMat ::GetScoreForGeno(int scIndex, int site,
-                                         int genotype) const {
-  YW_ASSERT_INFO(false, "Not supported3");
-  return 0.0;
+double ScistTernaryMat ::GetScoreForGeno(int scIndex, int site, int genotype) const
+{
+    YW_ASSERT_INFO(false, "Not supported3");
+    return 0.0;
 }
 
-bool ScistTernaryMat ::IsNoninformative(int site) const {
-  YW_ASSERT_INFO(false, "Not supported4");
-  return false;
+bool ScistTernaryMat ::IsNoninformative(int site) const
+{
+    YW_ASSERT_INFO(false, "Not supported4");
+    return false;
 }
 
-bool ScistTernaryMat ::IsCompatible(int s1, int s2) const {
-  YW_ASSERT_INFO(false, "Not supported5");
-  return false;
+bool ScistTernaryMat ::IsCompatible(int s1, int s2) const
+{
+    YW_ASSERT_INFO(false, "Not supported5");
+    return false;
 }
 
-std::string ScistTernaryMat ::ConsTree() const {
-  // construct phylogeny
-  vector<int> rootZero;
-  for (int i = 0; i < GetNumSites(); ++i) {
-    rootZero.push_back(0);
-  }
+std::string ScistTernaryMat ::ConsTree() const
+{
+    // construct phylogeny
+    std::vector<int> rootZero;
+    int numberSites = GetNumSites();
+    for (int i = 0; i < numberSites; ++i)
+    {
+        rootZero.push_back(0);
+    }
 
-  // construct binary matrix for distance computation
-  BinaryMatrix binMat;
-  ConsHapMatForDistCalc(binMat);
+    // construct binary matrix for distance computation
+    BinaryMatrix binMat;
+    ConsHapMatForDistCalc(binMat);
 
-  PhylogenyTree phTree;
-  phTree.SetRoot(rootZero);
-  phTree.ConsOnBinMatrix(binMat);
-  phTree.RemoveDegreeTwoNodes();
+    PhylogenyTree phTree;
+    phTree.SetRoot(rootZero);
+    phTree.ConsOnBinMatrix(binMat);
+    phTree.RemoveDegreeTwoNodes();
 
-  // now assign leaf labels
-  map<string, string> mapIdToLabels;
-  for (int i = 0; i < GetNumHaps(); ++i) {
-    // cout << "i: " << i << ", name: " << this->genosInput.GetGenotypeName(i)
-    // << endl;
-    string str = "(" + std::to_string(i) + ")";
-    mapIdToLabels[str] = GetGenotypeName(i);
-  }
-  phTree.ReassignLeafLabels(mapIdToLabels);
+    // now assign leaf labels
+    std::map<string, string> mapIdToLabels;
+    int numberHaps = GetNumHaps();
+    for (int i = 0; i < numberHaps; ++i)
+    {
+        //cout << "i: " << i << ", name: " << this->genosInput.GetGenotypeName(i) << endl;
+        string str = "(" + std::to_string(i) + ")";
+        mapIdToLabels[str] = GetGenotypeName(i);
+    }
+    phTree.ReassignLeafLabels(mapIdToLabels);
 
-  string res;
-  phTree.ConsNewickSorted(res);
-  // phTree.ConsNewick(res, false, 0.0, true);
-  return res;
+    string res;
+    phTree.ConsNewickSorted(res);
+    //phTree.ConsNewick(res, false, 0.0, true);
+    return res;
 }
 
-double ScistTernaryMat ::SumLogProbs() const {
-  YW_ASSERT_INFO(false, "Not impelemtned");
-  return 0.0;
+double ScistTernaryMat ::SumLogProbs() const
+{
+    YW_ASSERT_INFO(false, "Not impelemtned");
+    return 0.0;
 }
 
-void ScistTernaryMat ::Dump() const {
-  ScistGenGenotypeMat::Dump();
+void ScistTernaryMat ::Dump() const
+{
+    ScistGenGenotypeMat::Dump();
+
+    //
+    int numberHaps = GetNumHaps();
+    int numberSites = GetNumSites();
+    cout << "Matrix: [" << numberHaps << "," << numberSites << "]" << endl;
+    this->matTernary.Dump();
 
-  //
-  cout << "Matrix: [" << GetNumHaps() << "," << GetNumSites() << "]" << endl;
-  this->matTernary.Dump();
+    cout << "Probabilities: \n";
+    for (int i = 0; i < (int)matTernaryProbs.size(); ++i)
+    {
+        for (int j = 0; j < (int)matTernaryProbs[i].size(); ++j)
+        {
+            cout << "(" << matTernaryProbs[i][j].first << "," << matTernaryProbs[i][j].second << ")  ";
+        }
+        cout << endl;
+    }
+}
 
-  cout << "Probabilities: \n";
-  for (int i = 0; i < (int)matTernaryProbs.size(); ++i) {
-    for (int j = 0; j < (int)matTernaryProbs[i].size(); ++j) {
-      cout << "(" << matTernaryProbs[i][j].first << ","
-           << matTernaryProbs[i][j].second << ")  ";
+void ScistTernaryMat ::OutputImput(const string *pStrDesc) const
+{
+    //
+    int numberHaps = GetNumHaps();
+    int numberNames = GetNumNames();
+    int numberSites = GetNumSites();
+    cout << "Lineages: ";
+    for (int i = 0; i < numberNames; ++i)
+    {
+        cout << GetGenotypeName(i) << "  ";
     }
     cout << endl;
-  }
-}
-
-void ScistTernaryMat ::OutputImput(const string *pStrDesc) const {
-  //
-  cout << "Lineages: ";
-  for (int i = 0; i < GetNumNames(); ++i) {
-    cout << GetGenotypeName(i) << "  ";
-  }
-  cout << endl;
-  if (pStrDesc != NULL) {
-    cout << *pStrDesc << endl;
-  } else {
-    cout << "Imputed genotypes: \n";
-  }
-  for (int s = 0; s < GetNumSites(); ++s) {
-    cout << "Site " << setw(6) << s + 1 << ":\t";
-
-    for (int i = 0; i < GetNumHaps(); ++i) {
-      cout << GetGenotypeAt(i, s) << " ";
+    if (pStrDesc != NULL)
+    {
+        cout << *pStrDesc << endl;
     }
-    cout << endl;
-  }
-}
-
-void ScistTernaryMat ::ConsHapMatForDistCalc(
-    BinaryMatrix &matHaplotypes) const {
-  matHaplotypes.SetSize(GetNumHaps(), 2 * GetNumSites());
-  for (int r = 0; r < GetNumHaps(); ++r) {
-    for (int s = 0; s < GetNumSites(); ++s) {
-      int geno = GetGenotypeAt(r, s);
-      int allele0 = 0, allele1 = 0;
-      if (geno != 0) {
-        allele0 = 1;
-      }
-      if (geno == 2) {
-        allele1 = 1;
-      }
-      matHaplotypes(r, 2 * s) = allele0;
-      matHaplotypes(r, 2 * s + 1) = allele1;
-    }
-  }
-}
-
-bool ScistTernaryMat ::IsProbSignificant(double prob, double thresVal) const {
-  //
-  const double probConst = 0.3333333;
-  if (prob < probConst && prob > (probConst - thresVal / 2)) {
-    return false;
-  }
-  if (prob > probConst && prob < (probConst + thresVal / 2)) {
-    return false;
-  }
-  return true;
+    else
+    {
+        cout << "Imputed genotypes: \n";
+    }
+    for (int s = 0; s < numberSites; ++s)
+    {
+        cout << "Site " << setw(6) << s + 1 << ":\t";
+
+        for (int i = 0; i < numberHaps; ++i)
+        {
+            cout << GetGenotypeAt(i, s) << " ";
+        }
+        cout << endl;
+    }
+}
+
+void ScistTernaryMat ::ConsHapMatForDistCalc(BinaryMatrix &matHaplotypes) const
+{
+    int numberHaps = GetNumHaps();
+    matHaplotypes.SetSize(numberHaps, 2 * GetNumSites());
+    for (int r = 0; r < numberHaps; ++r)
+    {
+        for (int s = 0; s < GetNumSites(); ++s)
+        {
+            int geno = GetGenotypeAt(r, s);
+            int allele0 = 0, allele1 = 0;
+            if (geno != 0)
+            {
+                allele0 = 1;
+            }
+            if (geno == 2)
+            {
+                allele1 = 1;
+            }
+            matHaplotypes(r, 2 * s) = allele0;
+            matHaplotypes(r, 2 * s + 1) = allele1;
+        }
+    }
+}
+
+bool ScistTernaryMat ::IsProbSignificant(double prob, double thresVal) const
+{
+    //
+    const double probConst = 0.3333333;
+    if (prob < probConst && prob > (probConst - thresVal / 2))
+    {
+        return false;
+    }
+    if (prob > probConst && prob < (probConst + thresVal / 2))
+    {
+        return false;
+    }
+    return true;
 }
diff --git a/trisicell/external/scistree/ScistGenotype.hpp b/trisicell/external/scistree/ScistGenotype.hpp
index 7f04f1c..8f30136 100644
--- a/trisicell/external/scistree/ScistGenotype.hpp
+++ b/trisicell/external/scistree/ScistGenotype.hpp
@@ -9,8 +9,8 @@
 #ifndef ScistGenotype_hpp
 #define ScistGenotype_hpp
 
-#include <map>
 #include <set>
+#include <map>
 #include <string>
 
 #include "BinaryMatrix.h"
@@ -19,203 +19,164 @@
 // *************************************************************************************
 // genotypes: integer matrix
 
-class ScistGenGenotypeMat {
+class ScistGenGenotypeMat
+{
 public:
-  ScistGenGenotypeMat();
-  virtual ~ScistGenGenotypeMat() {}
-  virtual ScistGenGenotypeMat *CreateNewMat() const = 0;
-  virtual ScistGenGenotypeMat *Copy() const = 0;
-  virtual bool ReadFromFile(std::ifstream &infile, int numSites, int numSCs,
-                            bool fSiteName) = 0;
-  virtual void SetSize(int numSCs, int numSites) = 0;
-  virtual void AddGenotypeName(const std::string &strNameIn) {
-    listNames.push_back(strNameIn);
-  }
-  virtual void SetGenotypeName(int i, const std::string &strNameIn) {
-    listNames[i] = strNameIn;
-  }
-  virtual std::string GetGenotypeName(int i) const { return listNames[i]; }
-  virtual void AddSiteName(const std::string &strNameIn) {
-    listSiteNames.push_back(strNameIn);
-  }
-  virtual std::string GetSiteName(int i) const { return listSiteNames[i]; }
-  virtual void GetSiteNamesAll(std::vector<std::string> &listSiteNamesOut) {
-    listSiteNamesOut = listSiteNames;
-  }
-  virtual int GetGenotypeAt(int sc, int site) const = 0;
-  virtual int GetAltGenotypeAt(int sc, int site) const = 0;
-  virtual void SetGenotypeAt(int sc, int site, int geno) = 0;
-  virtual void AddGenotypeAt(int sc, int site, int geno) = 0;
-  virtual double GetGenotypeProbAllele0At(int sc, int site) const = 0;
-  virtual double GetGenotypeProbAt(int sc, int site, int geno) const = 0;
-  virtual void SetGenotypeProbAt(int sc, int site, double prob) = 0;
-  virtual void SetGenotypeProbOfGenoAt(int sc, int site, int geno,
-                                       double prob) = 0;
-  virtual bool IsBinary() const = 0;
-  virtual void FindMaximalCompatSites(
-      const std::vector<double> &wtSites,
-      std::vector<std::map<int, std::set<int> > > &listSetSitesCompat,
-      int maxNumSets,
-      const std::set<std::pair<int, int> > *pSetCompatPairs = NULL) const = 0;
-  virtual int GetNumSites() const = 0;
-  virtual int GetNumHaps() const = 0;
-  virtual void GetMutRowsHapAtSite(int site, std::set<int> &setRows) const = 0;
-  virtual void GetRowsWithGenoAtSite(int site, int geno,
-                                     std::set<int> &setRows) const = 0;
-  virtual double GetScoreForGeno(int scIndex, int site, int genotype) const = 0;
-  virtual bool IsNoninformative(int site) const = 0;
-  virtual bool IsCompatible(int s1, int s2) const = 0;
-  virtual ScistGenGenotypeMat *SubMatrix(const std::set<int> &setRows,
-                                         const std::set<int> &setSites) const;
-  virtual void Dump() const;
-  virtual void OutputImput(const string *pStrDesc = NULL) const = 0;
-  virtual std::string ConsTree() const = 0;
-  virtual double SumLogProbs() const = 0;
-  virtual void GetColMultiplicityMap(std::vector<int> &listColMulti) const = 0;
-  virtual bool IsProbSignificant(double prob, double thresVal) const = 0;
-  std::string ConsNJTree() const;
-  std::string ConsNJTreeZeroRoot() const;
-  std::string ConsNJTreeNoInc() const;
-  std::string GetFileName() const { return inputFileName; }
-  double IsProbAtCellPosSignificant(int sc, int site, double thresVal) const {
-    return IsProbSignificant(GetGenotypeProbAt(sc, site, 0), thresVal);
-  }
-  void SetSignificantThres(double thres) { thresSignifcant = thres; }
-  void SetFileName(std::string &fn) { inputFileName = fn; }
-  double CalcHammingDistBetwHaps(int h1, int h2) const;
-  void ConsCompatMap(std::set<std::pair<int, int> > &setCompatPairs) const;
-  int GetGenotypeNumOf(int geno) const;
-  int FindCellByName(const std::string &strName) const;
-  void ChangeGenosAtPositions(
-      const std::set<std::pair<std::pair<int, int>, int> > &listChangedPlaces);
-  static bool
-  AreSitesCompatInMap(const std::set<std::pair<int, int> > &setCompatPairs,
-                      int s1, int s2);
+    ScistGenGenotypeMat();
+    virtual ~ScistGenGenotypeMat() {}
+    virtual ScistGenGenotypeMat *CreateNewMat() const = 0;
+    virtual ScistGenGenotypeMat *Copy() const = 0;
+    virtual bool ReadFromFile(std::ifstream &infile, int numSites, int numSCs, bool fSiteName) = 0;
+    virtual void SetSize(int numSCs, int numSites) = 0;
+    virtual void AddGenotypeName(const std::string &strNameIn) { listNames.push_back(strNameIn); }
+    virtual void SetGenotypeName(int i, const std::string &strNameIn) { listNames[i] = strNameIn; }
+    virtual std::string GetGenotypeName(int i) const { return listNames[i]; }
+    virtual void AddSiteName(const std::string &strNameIn) { listSiteNames.push_back(strNameIn); }
+    virtual std::string GetSiteName(int i) const { return listSiteNames[i]; }
+    virtual void GetSiteNamesAll(std::vector<std::string> &listSiteNamesOut) { listSiteNamesOut = listSiteNames; }
+    virtual int GetGenotypeAt(int sc, int site) const = 0;
+    virtual int GetAltGenotypeAt(int sc, int site) const = 0;
+    virtual void SetGenotypeAt(int sc, int site, int geno) = 0;
+    virtual void AddGenotypeAt(int sc, int site, int geno) = 0;
+    virtual double GetGenotypeProbAllele0At(int sc, int site) const = 0;
+    virtual double GetGenotypeProbAt(int sc, int site, int geno) const = 0;
+    virtual void SetGenotypeProbAt(int sc, int site, double prob) = 0;
+    virtual void SetGenotypeProbOfGenoAt(int sc, int site, int geno, double prob) = 0;
+    virtual bool IsBinary() const = 0;
+    virtual void FindMaximalCompatSites(const std::vector<double> &wtSites, std::vector<std::map<int, std::set<int>>> &listSetSitesCompat, int maxNumSets, const std::set<std::pair<int, int>> *pSetCompatPairs = NULL) const = 0;
+    virtual int GetNumSites() const = 0;
+    virtual int GetNumHaps() const = 0;
+    virtual void GetMutRowsHapAtSite(int site, std::set<int> &setRows) const = 0;
+    virtual void GetRowsWithGenoAtSite(int site, int geno, std::set<int> &setRows) const = 0;
+    virtual double GetScoreForGeno(int scIndex, int site, int genotype) const = 0;
+    virtual bool IsNoninformative(int site) const = 0;
+    virtual bool IsCompatible(int s1, int s2) const = 0;
+    virtual ScistGenGenotypeMat *SubMatrix(const std::set<int> &setRows, const std::set<int> &setSites) const;
+    virtual void Dump() const;
+    virtual void OutputImput(const string *pStrDesc = NULL) const = 0;
+    virtual std::string ConsTree() const = 0;
+    virtual double SumLogProbs() const = 0;
+    virtual void GetColMultiplicityMap(std::vector<int> &listColMulti) const = 0;
+    virtual bool IsProbSignificant(double prob, double thresVal) const = 0;
+    std::string ConsNJTree() const;
+    std::string ConsNJTreeZeroRoot() const;
+    std::string ConsNJTreeNoInc() const;
+    std::string GetFileName() const { return inputFileName; }
+    double IsProbAtCellPosSignificant(int sc, int site, double thresVal) const { return IsProbSignificant(GetGenotypeProbAt(sc, site, 0), thresVal); }
+    void SetSignificantThres(double thres) { thresSignifcant = thres; }
+    void SetFileName(std::string &fn) { inputFileName = fn; }
+    double CalcHammingDistBetwHaps(int h1, int h2) const;
+    void ConsCompatMap(std::set<std::pair<int, int>> &setCompatPairs) const;
+    int GetGenotypeNumOf(int geno) const;
+    int FindCellByName(const std::string &strName) const;
+    void ChangeGenosAtPositions(const std::set<std::pair<std::pair<int, int>, int>> &listChangedPlaces);
+    static bool AreSitesCompatInMap(const std::set<std::pair<int, int>> &setCompatPairs, int s1, int s2);
 
 protected:
-  void TrimCliquesMaxDiff(std::set<std::set<int> > &listCliques,
-                          int maxToKeep) const;
-  void ResetNames() { listNames.clear(); }
-  int GetNumNames() const { return listNames.size(); }
-  double GetSignificanceThres() const { return thresSignifcant; }
+    void TrimCliquesMaxDiff(std::set<std::set<int>> &listCliques, int maxToKeep) const;
+    void ResetNames() { listNames.clear(); }
+    int GetNumNames() const { return listNames.size(); }
+    double GetSignificanceThres() const { return thresSignifcant; }
 
 private:
-  std::vector<std::string> listNames;
-  std::vector<std::string> listSiteNames;
-  std::string inputFileName;
-  double thresSignifcant;
+    std::vector<std::string> listNames;
+    std::vector<std::string> listSiteNames;
+    std::string inputFileName;
+    double thresSignifcant;
 };
 
 // *************************************************************************************
 // genotypes: binary matrix
 
-class ScistHaplotypeMat : public ScistGenGenotypeMat {
+class ScistHaplotypeMat : public ScistGenGenotypeMat
+{
 public:
-  ScistHaplotypeMat();
-  virtual ~ScistHaplotypeMat() {}
-  virtual ScistGenGenotypeMat *Copy() const;
-  virtual ScistGenGenotypeMat *CreateNewMat() const {
-    return new ScistHaplotypeMat;
-  }
-  virtual bool ReadFromFile(std::ifstream &infile, int numSites, int numSCs,
-                            bool fSiteName);
-  virtual void SetSize(int numSCs, int numSites);
-  virtual int GetGenotypeAt(int sc, int site) const;
-  virtual int GetAltGenotypeAt(int sc, int site) const;
-  virtual void SetGenotypeAt(int sc, int site, int geno);
-  virtual void AddGenotypeAt(int sc, int site, int geno);
-  virtual double GetGenotypeProbAllele0At(int sc, int site) const;
-  virtual double GetGenotypeProbAt(int sc, int site, int geno) const {
-    if (geno == 0)
-      return GetGenotypeProbAllele0At(sc, site);
-    else
-      return 1.0 - GetGenotypeProbAllele0At(sc, site);
-  }
-  virtual void SetGenotypeProbAt(int sc, int site, double prob);
-  virtual void SetGenotypeProbOfGenoAt(int sc, int site, int geno, double prob);
-  virtual bool IsBinary() const { return true; }
-  virtual void FindMaximalCompatSites(
-      const std::vector<double> &wtSites,
-      std::vector<std::map<int, std::set<int> > > &listSetSitesCompat,
-      int maxNumSets,
-      const std::set<std::pair<int, int> > *pSetCompatPairs = NULL) const;
-  virtual int GetNumSites() const;
-  virtual int GetNumHaps() const;
-  virtual void GetMutRowsHapAtSite(int site, std::set<int> &setRows) const;
-  virtual void GetRowsWithGenoAtSite(int site, int geno,
-                                     std::set<int> &setRows) const;
-  virtual double GetScoreForGeno(int scIndex, int site, int genotype) const;
-  virtual bool IsNoninformative(int site) const;
-  virtual bool IsCompatible(int s1, int s2) const;
-  virtual std::string ConsTree() const;
-  virtual double SumLogProbs() const;
-  virtual void Dump() const;
-  virtual void OutputImput(const string *pStrDesc = NULL) const;
-  virtual void GetColMultiplicityMap(std::vector<int> &listColMulti) const {
-    matHaplotypes.GetColMultiplicityMap(listColMulti);
-  }
-  virtual bool IsProbSignificant(double prob, double thresVal) const;
-  BinaryMatrix &GetHapMat() { return matHaplotypes; }
+    ScistHaplotypeMat();
+    virtual ~ScistHaplotypeMat() {}
+    virtual ScistGenGenotypeMat *Copy() const;
+    virtual ScistGenGenotypeMat *CreateNewMat() const { return new ScistHaplotypeMat; }
+    virtual bool ReadFromFile(std::ifstream &infile, int numSites, int numSCs, bool fSiteName);
+    virtual void SetSize(int numSCs, int numSites);
+    virtual int GetGenotypeAt(int sc, int site) const;
+    virtual int GetAltGenotypeAt(int sc, int site) const;
+    virtual void SetGenotypeAt(int sc, int site, int geno);
+    virtual void AddGenotypeAt(int sc, int site, int geno);
+    virtual double GetGenotypeProbAllele0At(int sc, int site) const;
+    virtual double GetGenotypeProbAt(int sc, int site, int geno) const
+    {
+        if (geno == 0)
+            return GetGenotypeProbAllele0At(sc, site);
+        else
+            return 1.0 - GetGenotypeProbAllele0At(sc, site);
+    }
+    virtual void SetGenotypeProbAt(int sc, int site, double prob);
+    virtual void SetGenotypeProbOfGenoAt(int sc, int site, int geno, double prob);
+    virtual bool IsBinary() const { return true; }
+    virtual void FindMaximalCompatSites(const std::vector<double> &wtSites, std::vector<std::map<int, std::set<int>>> &listSetSitesCompat, int maxNumSets, const std::set<std::pair<int, int>> *pSetCompatPairs = NULL) const;
+    virtual int GetNumSites() const;
+    virtual int GetNumHaps() const;
+    virtual void GetMutRowsHapAtSite(int site, std::set<int> &setRows) const;
+    virtual void GetRowsWithGenoAtSite(int site, int geno, std::set<int> &setRows) const;
+    virtual double GetScoreForGeno(int scIndex, int site, int genotype) const;
+    virtual bool IsNoninformative(int site) const;
+    virtual bool IsCompatible(int s1, int s2) const;
+    virtual std::string ConsTree() const;
+    virtual double SumLogProbs() const;
+    virtual void Dump() const;
+    virtual void OutputImput(const string *pStrDesc = NULL) const;
+    virtual void GetColMultiplicityMap(std::vector<int> &listColMulti) const { matHaplotypes.GetColMultiplicityMap(listColMulti); }
+    virtual bool IsProbSignificant(double prob, double thresVal) const;
+    BinaryMatrix &GetHapMat() { return matHaplotypes; }
 
 private:
-  bool ReadFromFileHapProb(std::ifstream &infile, double &prob0);
+    bool ReadFromFileHapProb(std::ifstream &infile, double &prob0);
 
-  BinaryMatrix matHaplotypes;
-  std::vector<std::vector<double> > matHaplotypesProb0;
+    BinaryMatrix matHaplotypes;
+    std::vector<std::vector<double>> matHaplotypesProb0;
 };
 
 // *************************************************************************************
 // genotypes: ternary matrix
 
-class ScistTernaryMat : public ScistGenGenotypeMat {
+class ScistTernaryMat : public ScistGenGenotypeMat
+{
 public:
-  ScistTernaryMat();
-  virtual ~ScistTernaryMat() {}
-  virtual ScistGenGenotypeMat *Copy() const;
-  virtual ScistGenGenotypeMat *CreateNewMat() const {
-    return new ScistTernaryMat;
-  }
-  virtual bool ReadFromFile(std::ifstream &infile, int numSites, int numSCs,
-                            bool fSiteName);
-  virtual void SetSize(int numSCs, int numSites);
-  virtual int GetGenotypeAt(int sc, int site) const;
-  virtual int GetAltGenotypeAt(int sc, int site) const;
-  virtual void SetGenotypeAt(int sc, int site, int geno);
-  virtual void AddGenotypeAt(int sc, int site, int geno);
-  virtual double GetGenotypeProbAllele0At(int sc, int site) const;
-  virtual double GetGenotypeProbAt(int sc, int site, int geno) const;
-  virtual void SetGenotypeProbAt(int sc, int site, double prob);
-  virtual void SetGenotypeProbOfGenoAt(int sc, int site, int geno, double prob);
-  virtual bool IsBinary() const { return false; }
-  virtual void FindMaximalCompatSites(
-      const std::vector<double> &wtSites,
-      std::vector<std::map<int, std::set<int> > > &listSetSitesCompat,
-      int maxNumSets,
-      const std::set<std::pair<int, int> > *pSetCompatPairs = NULL) const;
-  virtual int GetNumSites() const;
-  virtual int GetNumHaps() const;
-  virtual void GetMutRowsHapAtSite(int site, std::set<int> &setRows) const;
-  virtual void GetRowsWithGenoAtSite(int site, int geno,
-                                     std::set<int> &setRows) const;
-  virtual double GetScoreForGeno(int scIndex, int site, int genotype) const;
-  virtual bool IsNoninformative(int site) const;
-  virtual bool IsCompatible(int s1, int s2) const;
-  virtual std::string ConsTree() const;
-  virtual double SumLogProbs() const;
-  virtual void Dump() const;
-  virtual void OutputImput(const string *pStrDesc = NULL) const;
-  virtual void GetColMultiplicityMap(std::vector<int> &listColMulti) const {
-    matTernary.GetColMultiplicityMap(listColMulti);
-  }
-  virtual bool IsProbSignificant(double prob, double thresVal) const;
+    ScistTernaryMat();
+    virtual ~ScistTernaryMat() {}
+    virtual ScistGenGenotypeMat *Copy() const;
+    virtual ScistGenGenotypeMat *CreateNewMat() const { return new ScistTernaryMat; }
+    virtual bool ReadFromFile(std::ifstream &infile, int numSites, int numSCs, bool fSiteName);
+    virtual void SetSize(int numSCs, int numSites);
+    virtual int GetGenotypeAt(int sc, int site) const;
+    virtual int GetAltGenotypeAt(int sc, int site) const;
+    virtual void SetGenotypeAt(int sc, int site, int geno);
+    virtual void AddGenotypeAt(int sc, int site, int geno);
+    virtual double GetGenotypeProbAllele0At(int sc, int site) const;
+    virtual double GetGenotypeProbAt(int sc, int site, int geno) const;
+    virtual void SetGenotypeProbAt(int sc, int site, double prob);
+    virtual void SetGenotypeProbOfGenoAt(int sc, int site, int geno, double prob);
+    virtual bool IsBinary() const { return false; }
+    virtual void FindMaximalCompatSites(const std::vector<double> &wtSites, std::vector<std::map<int, std::set<int>>> &listSetSitesCompat, int maxNumSets, const std::set<std::pair<int, int>> *pSetCompatPairs = NULL) const;
+    virtual int GetNumSites() const;
+    virtual int GetNumHaps() const;
+    virtual void GetMutRowsHapAtSite(int site, std::set<int> &setRows) const;
+    virtual void GetRowsWithGenoAtSite(int site, int geno, std::set<int> &setRows) const;
+    virtual double GetScoreForGeno(int scIndex, int site, int genotype) const;
+    virtual bool IsNoninformative(int site) const;
+    virtual bool IsCompatible(int s1, int s2) const;
+    virtual std::string ConsTree() const;
+    virtual double SumLogProbs() const;
+    virtual void Dump() const;
+    virtual void OutputImput(const string *pStrDesc = NULL) const;
+    virtual void GetColMultiplicityMap(std::vector<int> &listColMulti) const { matTernary.GetColMultiplicityMap(listColMulti); }
+    virtual bool IsProbSignificant(double prob, double thresVal) const;
 
 private:
-  bool ReadFromFileTernaryProb(std::ifstream &infile, double &prob0,
-                               double &prob1);
-  void ConsHapMatForDistCalc(BinaryMatrix &matHaplotypes) const;
+    bool ReadFromFileTernaryProb(std::ifstream &infile, double &prob0, double &prob1);
+    void ConsHapMatForDistCalc(BinaryMatrix &matHaplotypes) const;
 
-  GenotypeMatrix matTernary;
-  std::vector<std::vector<std::pair<double, double> > > matTernaryProbs;
+    GenotypeMatrix matTernary;
+    std::vector<std::vector<std::pair<double, double>>> matTernaryProbs;
 };
 
 #endif /* ScistGenotype_hpp */
diff --git a/trisicell/external/scistree/ScistPerfPhyImp.cpp b/trisicell/external/scistree/ScistPerfPhyImp.cpp
index 5c9ed58..cb604f1 100644
--- a/trisicell/external/scistree/ScistPerfPhyImp.cpp
+++ b/trisicell/external/scistree/ScistPerfPhyImp.cpp
@@ -7,1111 +7,1149 @@
 //
 
 #include "ScistPerfPhyImp.hpp"
-#include "MarginalTree.h"
-#include "PhylogenyTree.h"
-#include "RBT.h"
 #include "ScistGenotype.hpp"
-#include "TreeBuilder.h"
 #include "Utils3.h"
 #include "Utils4.h"
-#include "UtilsNumerical.h"
-#include <cmath>
+#include "TreeBuilder.h"
+#include "MarginalTree.h"
+#include "RBT.h"
+#include "PhylogenyTree.h"
 #include <iomanip>
+#include <cmath>
+#include "UtilsNumerical.h"
+#include <chrono>
 
 const int MAX_SPR_OP = 1;
 
 // *************************************************************************************
 // Utiltiies
 
-void OutputMutationTree(const char *filenameMT, const string &strMutTree,
-                        bool fLabel) {
-  PhylogenyTreeBasic treeMut;
-  treeMut.ConsOnNewickEdgeLabelTree(strMutTree);
-  if (fLabel) {
-    treeMut.OutputGML(filenameMT);
-  } else {
-    treeMut.OutputGMLNoLabel(filenameMT);
-  }
+void OutputMutationTree(const char *filenameMT, const string &strMutTree, bool fLabel)
+{
+    PhylogenyTreeBasic treeMut;
+    treeMut.ConsOnNewickEdgeLabelTree(strMutTree);
+    if (fLabel)
+    {
+        treeMut.OutputGML(filenameMT);
+    }
+    else
+    {
+        treeMut.OutputGMLNoLabel(filenameMT);
+    }
 }
 
 // *************************************************************************************
 // Build phylogeny by tree search with branch length
 
-ScistFullPerfPhyMLE ::ScistFullPerfPhyMLE(ScistGenGenotypeMat &genos)
-    : genosInput(genos), fVerbose(false), pMargTreeOptBrLen(NULL),
-      brOptIndex(-1) {
-  Init();
+ScistFullPerfPhyMLE ::ScistFullPerfPhyMLE(ScistGenGenotypeMat &genos) : genosInput(genos), fVerbose(false), pMargTreeOptBrLen(NULL), brOptIndex(-1)
+{
+    Init();
 }
 
-void ScistFullPerfPhyMLE ::Infer() {
-  set<ScistPerfPhyCluster> setClusAllGuide;
-  this->treeGuide.GetAllClusters(setClusAllGuide);
-  string strTreeOpt = ConsTreeFromSetClusters(setClusAllGuide);
-
-  MarginalTree treeOpt;
-  ReadinMarginalTreesNewickWLenString(strTreeOpt, this->genosInput.GetNumHaps(),
-                                      treeOpt);
-  treeOpt.InitUnitEdgelen();
-
-  // double loglikeliOptInit = CalcLikelihoodOf(treeOpt);
-
-  // optimize branch length
-  double loglikeliOptBr = OptBranchLens(treeOpt);
-  strTreeOpt = treeOpt.GetNewickSorted(true);
-  // cout << "Initial tree: "  << treeOpt.GetNewick() << ", log-likelihood: " <<
-  // loglikeliOptBr << endl;
-
-  set<string> setTreeSearchedBefore;
-  setTreeSearchedBefore.insert(strTreeOpt);
-
-  // now search for neighborhood of the current tree to optimize the tree
-  while (true) {
-    set<string> setNgbrTrees;
-    // GetNgbrTreesFromSPR( this->genosInput.GetNumHaps(), strTreeOpt,
-    // setNgbrTrees );
-    ScistPerfPhyMLE ::GetNgbrTreesFrom(this->genosInput.GetNumHaps(),
-                                       strTreeOpt, setNgbrTrees);
-    if (fVerbose) {
-      cout << "Current best likelihood: " << loglikeliOptBr
-           << ", current tree: " << treeOpt.GetNewickSorted(true)
-           << ", tree neighborhood size: " << setNgbrTrees.size() << endl;
-    }
-    bool fCont = false;
-    for (set<string>::iterator it = setNgbrTrees.begin();
-         it != setNgbrTrees.end(); ++it) {
-      if (setTreeSearchedBefore.find(*it) != setTreeSearchedBefore.end()) {
-        continue;
-      }
-      setTreeSearchedBefore.insert(*it);
-
-      // cout << "Neighbor tree: " << *it << endl;
-      MarginalTree treeStep;
-      ReadinMarginalTreesNewickWLenString(*it, this->genosInput.GetNumHaps(),
-                                          treeStep);
-      // treeStep.InitUnitEdgelen();
-      // cout << "treeStep: " << treeStep.GetNewick() << endl;
-      double loglikeliStep = OptBranchLens(treeStep);
-      // double loglikeliStep = CalcLikelihoodOf( treeStep );
-      // cout << ", loglikeliStep (w/ branch length optimization): " <<
-      // loglikeliStep << endl;
-      if (loglikeliStep > loglikeliOptBr) {
-        // cout << "BETTER.\n";
-        loglikeliOptBr = loglikeliStep;
-        strTreeOpt = *it;
-        treeOpt = treeStep;
-        fCont = true;
-      }
-    }
-    if (fCont == false) {
-      break;
+void ScistFullPerfPhyMLE ::Infer()
+{
+    set<ScistPerfPhyCluster> setClusAllGuide;
+    this->treeGuide.GetAllClusters(setClusAllGuide);
+    string strTreeOpt = ConsTreeFromSetClusters(setClusAllGuide);
+
+    MarginalTree treeOpt;
+    ReadinMarginalTreesNewickWLenString(strTreeOpt, this->genosInput.GetNumHaps(), treeOpt);
+    treeOpt.InitUnitEdgelen();
+
+    //double loglikeliOptInit = CalcLikelihoodOf(treeOpt);
+
+    // optimize branch length
+    double loglikeliOptBr = OptBranchLens(treeOpt);
+    strTreeOpt = treeOpt.GetNewickSorted(true);
+    //cout << "Initial tree: "  << treeOpt.GetNewick() << ", log-likelihood: " << loglikeliOptBr << endl;
+
+    set<string> setTreeSearchedBefore;
+    setTreeSearchedBefore.insert(strTreeOpt);
+
+    // now search for neighborhood of the current tree to optimize the tree
+    while (true)
+    {
+        set<string> setNgbrTrees;
+        //GetNgbrTreesFromSPR( this->genosInput.GetNumHaps(), strTreeOpt, setNgbrTrees );
+        ScistPerfPhyMLE ::GetNgbrTreesFrom(this->genosInput.GetNumHaps(), strTreeOpt, setNgbrTrees);
+        if (fVerbose)
+        {
+            cout << "Current best likelihood: " << loglikeliOptBr << ", current tree: " << treeOpt.GetNewickSorted(true) << ", tree neighborhood size: " << setNgbrTrees.size() << endl;
+        }
+        bool fCont = false;
+        for (set<string>::iterator it = setNgbrTrees.begin(); it != setNgbrTrees.end(); ++it)
+        {
+            if (setTreeSearchedBefore.find(*it) != setTreeSearchedBefore.end())
+            {
+                continue;
+            }
+            setTreeSearchedBefore.insert(*it);
+
+            //cout << "Neighbor tree: " << *it << endl;
+            MarginalTree treeStep;
+            ReadinMarginalTreesNewickWLenString(*it, this->genosInput.GetNumHaps(), treeStep);
+            //treeStep.InitUnitEdgelen();
+            //cout << "treeStep: " << treeStep.GetNewick() << endl;
+            double loglikeliStep = OptBranchLens(treeStep);
+            //double loglikeliStep = CalcLikelihoodOf( treeStep );
+            //cout << ", loglikeliStep (w/ branch length optimization): " << loglikeliStep << endl;
+            if (loglikeliStep > loglikeliOptBr)
+            {
+                //cout << "BETTER.\n";
+                loglikeliOptBr = loglikeliStep;
+                strTreeOpt = *it;
+                treeOpt = treeStep;
+                fCont = true;
+            }
+        }
+        if (fCont == false)
+        {
+            break;
+        }
     }
-  }
 
-  cout << "**** Optimal cost: " << loglikeliOptBr << endl;
-  cout << "Constructed single cell phylogeny: "
-       << treeOpt.GetNewickSorted(false) << endl;
-  cout << "With branch length: " << treeOpt.GetNewickSorted(true) << endl;
+    cout << "**** Optimal cost: " << loglikeliOptBr << endl;
+    cout << "Constructed single cell phylogeny: " << treeOpt.GetNewickSorted(false) << endl;
+    cout << "With branch length: " << treeOpt.GetNewickSorted(true) << endl;
 }
 
-void ScistFullPerfPhyMLE ::Init() {
-  //
-  cacheProbMutClades.resize(genosInput.GetNumSites());
-  // get all clusters
-  // listClusMutsInput.clear();
-  // for(int s=0; s<genosInput.GetNumSites(); ++s)
-  //{
-  //    set<int> muts;
-  //    genosInput.GetMutRowsHapAtSite(s, muts);
-  //    ScistPerfPhyCluster clus(muts);
-  //    listClusMutsInput.push_back(clus);
-  //}
-  listClusMutsInputHetero.clear();
-  listClusMutsInputHomo.clear();
-  for (int s = 0; s < genosInput.GetNumSites(); ++s) {
-    set<int> muts;
-    genosInput.GetRowsWithGenoAtSite(s, 1, muts);
-    ScistPerfPhyCluster clus(muts);
-    listClusMutsInputHetero.push_back(clus);
-
-    set<int> muts2;
-    genosInput.GetRowsWithGenoAtSite(s, 2, muts2);
-    ScistPerfPhyCluster clus2(muts2);
-    listClusMutsInputHomo.push_back(clus2);
-  }
-
-  this->genosInput.GetColMultiplicityMap(listInputColMulti);
-
-  // construct NJ tree as the initial tree
-  string strNJ = this->genosInput.ConsNJTreeZeroRoot();
-  this->treeGuide.Init(strNJ);
-}
-
-double ScistFullPerfPhyMLE ::OptBranchLens(MarginalTree &tree) {
-  //
-  this->pMargTreeOptBrLen = &tree;
+void ScistFullPerfPhyMLE ::Init()
+{
+    //
+    cacheProbMutClades.resize(genosInput.GetNumSites());
+    // get all clusters
+    //listClusMutsInput.clear();
+    //for(int s=0; s<genosInput.GetNumSites(); ++s)
+    //{
+    //    set<int> muts;
+    //    genosInput.GetMutRowsHapAtSite(s, muts);
+    //    ScistPerfPhyCluster clus(muts);
+    //    listClusMutsInput.push_back(clus);
+    //}
+    listClusMutsInputHetero.clear();
+    listClusMutsInputHomo.clear();
+    for (int s = 0; s < genosInput.GetNumSites(); ++s)
+    {
+        set<int> muts;
+        genosInput.GetRowsWithGenoAtSite(s, 1, muts);
+        ScistPerfPhyCluster clus(muts);
+        listClusMutsInputHetero.push_back(clus);
+
+        set<int> muts2;
+        genosInput.GetRowsWithGenoAtSite(s, 2, muts2);
+        ScistPerfPhyCluster clus2(muts2);
+        listClusMutsInputHomo.push_back(clus2);
+    }
 
-  const double MIN_BR_LEN = 0.01;
-  const double MAX_BR_LEN = 10.0;
-  const double TOLNUM = 0.2;
+    this->genosInput.GetColMultiplicityMap(listInputColMulti);
 
-  double loglikeliRes = -1.0 * HAP_MAX_INT;
+    // construct NJ tree as the initial tree
+    string strNJ = this->genosInput.ConsNJTreeZeroRoot();
+    this->treeGuide.Init(strNJ);
+}
 
-  // optimize branch of each once and only once
-  for (int br = 0; br < tree.GetTotNodesNum(); ++br) {
-    if (br == tree.GetRoot()) {
-      continue;
-    }
-    this->brOptIndex = br;
-    double brLen = tree.GetEdgeLen(br);
-    double brNew = brLen;
-    double likeliMax =
-        -1.0 * Func1DMinBrent(MIN_BR_LEN, brLen, MAX_BR_LEN, TOLNUM, &brNew);
-    if (likeliMax > loglikeliRes) {
-      loglikeliRes = likeliMax;
-      tree.SetBranchLen(br, brNew);
-    } else {
-      tree.SetBranchLen(br, brLen);
+double ScistFullPerfPhyMLE ::OptBranchLens(MarginalTree &tree)
+{
+    //
+    this->pMargTreeOptBrLen = &tree;
+
+    const double MIN_BR_LEN = 0.01;
+    const double MAX_BR_LEN = 10.0;
+    const double TOLNUM = 0.2;
+
+    double loglikeliRes = -1.0 * HAP_MAX_INT;
+
+    // optimize branch of each once and only once
+    for (int br = 0; br < tree.GetTotNodesNum(); ++br)
+    {
+        if (br == tree.GetRoot())
+        {
+            continue;
+        }
+        this->brOptIndex = br;
+        double brLen = tree.GetEdgeLen(br);
+        double brNew = brLen;
+        double likeliMax = -1.0 * Func1DMinBrent(MIN_BR_LEN, brLen, MAX_BR_LEN, TOLNUM, &brNew);
+        if (likeliMax > loglikeliRes)
+        {
+            loglikeliRes = likeliMax;
+            tree.SetBranchLen(br, brNew);
+        }
+        else
+        {
+            tree.SetBranchLen(br, brLen);
+        }
     }
-  }
-  return loglikeliRes;
+    return loglikeliRes;
 }
 
-double ScistFullPerfPhyMLE ::EvaluateAt(double pt, void *pParam) {
-  //
-  YW_ASSERT_INFO(pMargTreeOptBrLen != NULL, "Tree to opt branch: null");
-  YW_ASSERT_INFO(brOptIndex >= 0, "Branch opt not set");
-  pMargTreeOptBrLen->SetBranchLen(brOptIndex, pt);
-  return -1.0 * CalcLikelihoodOf(*pMargTreeOptBrLen);
+double ScistFullPerfPhyMLE ::EvaluateAt(double pt, void *pParam)
+{
+    //
+    YW_ASSERT_INFO(pMargTreeOptBrLen != NULL, "Tree to opt branch: null");
+    YW_ASSERT_INFO(brOptIndex >= 0, "Branch opt not set");
+    pMargTreeOptBrLen->SetBranchLen(brOptIndex, pt);
+    return -1.0 * CalcLikelihoodOf(*pMargTreeOptBrLen);
 }
 
-double ScistFullPerfPhyMLE ::CalcLikelihoodOf(MarginalTree &tree) const {
-  set<pair<ScistPerfPhyCluster, ScistPerfPhyCluster> > setClusDone;
-  double res = 0.0;
-
-  vector<set<int> > listClades;
-  tree.ConsDecedentLeavesInfoLabels(listClades);
-  for (int i = 0; i < (int)listClades.size(); ++i) {
-    DecAllNumInSet(listClades[i]);
-    // cout << "Tree clade: ";
-    // DumpIntSet(listClades[i]);
-  }
-  double totEdgeLen = tree.GetTotEdgeLen();
-  ScistPerfPhyProbOnTree sppp(this->genosInput, tree);
-
-  for (int site = 0; site < genosInput.GetNumSites(); ++site) {
-    pair<ScistPerfPhyCluster, ScistPerfPhyCluster> pp(
-        listClusMutsInputHetero[site], listClusMutsInputHomo[site]);
-    if (setClusDone.find(pp) != setClusDone.end()) {
-      continue;
+double ScistFullPerfPhyMLE ::CalcLikelihoodOf(MarginalTree &tree) const
+{
+    set<pair<ScistPerfPhyCluster, ScistPerfPhyCluster>> setClusDone;
+    double res = 0.0;
+
+    vector<set<int>> listClades;
+    tree.ConsDecedentLeavesInfoLabels(listClades);
+    for (int i = 0; i < (int)listClades.size(); ++i)
+    {
+        DecAllNumInSet(listClades[i]);
+        //cout << "Tree clade: ";
+        //DumpIntSet(listClades[i]);
+    }
+    double totEdgeLen = tree.GetTotEdgeLen();
+    ScistPerfPhyProbOnTree sppp(this->genosInput, tree);
+
+    for (int site = 0; site < genosInput.GetNumSites(); ++site)
+    {
+        pair<ScistPerfPhyCluster, ScistPerfPhyCluster> pp(listClusMutsInputHetero[site], listClusMutsInputHomo[site]);
+        if (setClusDone.find(pp) != setClusDone.end())
+        {
+            continue;
+        }
+        int multi = this->listInputColMulti[site];
+        double loglikeliSite = CalcLikelihoodOf(sppp, site, tree, totEdgeLen, listClades);
+        res += loglikeliSite * multi;
+        setClusDone.insert(pp);
     }
-    int multi = this->listInputColMulti[site];
-    double loglikeliSite =
-        CalcLikelihoodOf(sppp, site, tree, totEdgeLen, listClades);
-    res += loglikeliSite * multi;
-    setClusDone.insert(pp);
-  }
-
-  return res;
+
+    return res;
 }
 
-double ScistFullPerfPhyMLE ::CalcLikelihoodOf(
-    ScistPerfPhyProbOnTree &sppp, int site, MarginalTree &tree,
-    double totEdgeLen, const vector<set<int> > &listClades) const {
-  return sppp.CalcProbForSite(site, totEdgeLen, listClades);
+double ScistFullPerfPhyMLE ::CalcLikelihoodOf(ScistPerfPhyProbOnTree &sppp, int site, MarginalTree &tree, double totEdgeLen, const vector<set<int>> &listClades) const
+{
+    return sppp.CalcProbForSite(site, totEdgeLen, listClades);
 }
 
-std::string ScistFullPerfPhyMLE ::ConsTreeFromSetClusters(
-    const std::set<ScistPerfPhyCluster> &setClusters) const {
-  //
-  // now construct tree
-  ScistInfPerfPhyUtils treeBuild;
-  map<int, ScistPerfPhyCluster> mapPickedClus;
-  int s = 0;
-  for (set<ScistPerfPhyCluster>::iterator it = setClusters.begin();
-       it != setClusters.end(); ++it) {
-    mapPickedClus[s] = *it;
-    ++s;
-  }
-  string strTree =
-      treeBuild.ConsTreeWCombDistClus(this->genosInput, mapPickedClus);
-  return strTree;
+std::string ScistFullPerfPhyMLE ::ConsTreeFromSetClusters(const std::set<ScistPerfPhyCluster> &setClusters) const
+{
+    //
+    // now construct tree
+    ScistInfPerfPhyUtils treeBuild;
+    map<int, ScistPerfPhyCluster> mapPickedClus;
+    int s = 0;
+    for (set<ScistPerfPhyCluster>::iterator it = setClusters.begin(); it != setClusters.end(); ++it)
+    {
+        mapPickedClus[s] = *it;
+        ++s;
+    }
+    string strTree = treeBuild.ConsTreeWCombDistClus(this->genosInput, mapPickedClus);
+    return strTree;
 }
 
 // *************************************************************************************
 // Build phylogeny by tree search
 
-ScistPerfPhyMLE ::ScistPerfPhyMLE(ScistGenGenotypeMat &genos)
-    : genosInput(genos), fVerbose(false), fOptBrLen(false), fOutput(true),
-      fOutputPPWithEdgeLabels(false), fOutputLabel(true), fSPR(false),
-      maxSPRNum(MAX_SPR_OP) {
-  Init();
+ScistPerfPhyMLE ::ScistPerfPhyMLE(ScistGenGenotypeMat &genos) : genosInput(genos), fVerbose(false), fOptBrLen(false), fOutput(true), fOutputPPWithEdgeLabels(false), fOutputLabel(true), fSPR(false), maxSPRNum(MAX_SPR_OP)
+{
+    Init();
 }
 
-double ScistPerfPhyMLE ::Infer(
-    std::set<std::pair<std::pair<int, int>, int> > *plistChangedPlaces,
-    std::string *pstrTreeNW) {
-  // cout << "ScistPerfPhyMLE :: Infer\n";
-  //
-  set<ScistPerfPhyCluster> setClusAllGuide;
-  this->treeGuide.GetAllClusters(setClusAllGuide);
-  // cout << "Number of clusters: " << setClusAllGuide.size() << endl;
-  string strTreeOpt = ConsTreeFromSetClusters(setClusAllGuide);
-  // cout << "strTreeOpt: " << strTreeOpt << endl;
-  // set<ScistPerfPhyCluster> setClusAllGuideUse;
-  // GetClustersFromTree(strTreeOpt, setClusAllGuideUse);
-  std::vector<pair<ScistPerfPhyCluster, ScistPerfPhyCluster> >
-      listChangedClustersOpt;
-  // double loglikeliBest = ScoreSetClusters( setClusAllGuideUse,
-  // listChangedClustersOpt );
-  double loglikeliBest = ScoreTree(strTreeOpt, listChangedClustersOpt);
-  // cout << "Init likelihood: " << loglikeliBest << endl;
-  set<string> setTreeSearchedBefore;
-  setTreeSearchedBefore.insert(strTreeOpt);
-
-  // now search for neighborhood of the current tree to optimize the tree
-  int numSPRPerformed = 0;
-  bool fNNI = true;
-  while (true) {
-    // if(fNNI)
-    //{
-    // cout << "NNI mode\n";
-    //}
-    // else
-    //{
-    // cout << "SPR mode\n";
-    //}
-
-    set<string> setNgbrTrees;
-    if (fNNI == false && fSPR && numSPRPerformed <= maxSPRNum) {
-      GetNgbrTreesFromSPR(this->genosInput.GetNumHaps(), strTreeOpt,
-                          setNgbrTrees);
-      ++numSPRPerformed;
-      // fNNI = true;
-    } else if (fNNI == true) {
-      GetNgbrTreesFrom(this->genosInput.GetNumHaps(), strTreeOpt, setNgbrTrees);
-    } else {
-      break;
+double ScistPerfPhyMLE ::Infer(std::set<std::pair<std::pair<int, int>, int>> *plistChangedPlaces, std::string *pstrTreeNW)
+{
+    //cout << "ScistPerfPhyMLE :: Infer\n";
+    //
+    set<ScistPerfPhyCluster> setClusAllGuide;
+    this->treeGuide.GetAllClusters(setClusAllGuide);
+    //cout << "Number of clusters: " << setClusAllGuide.size() << endl;
+    string strTreeOpt = ConsTreeFromSetClusters(setClusAllGuide);
+    //cout << "strTreeOpt: " << strTreeOpt << endl;
+    //set<ScistPerfPhyCluster> setClusAllGuideUse;
+    //GetClustersFromTree(strTreeOpt, setClusAllGuideUse);
+    std::vector<pair<ScistPerfPhyCluster, ScistPerfPhyCluster>> listChangedClustersOpt;
+    //double loglikeliBest = ScoreSetClusters( setClusAllGuideUse, listChangedClustersOpt );
+    double loglikeliBest = ScoreTree(strTreeOpt, listChangedClustersOpt);
+    //cout << "Init likelihood: " << loglikeliBest << endl;
+    set<string> setTreeSearchedBefore;
+    setTreeSearchedBefore.insert(strTreeOpt);
+
+    // now search for neighborhood of the current tree to optimize the tree
+    int numSPRPerformed = 0;
+    bool fNNI = true;
+
+    // thread pool
+    if (fVerbose)
+    {
+        std::cout << "Starting pool with " << numThreads << " threads" << endl;
     }
-    if (fVerbose) {
-      cout << "Current best likelihood: " << loglikeliBest
-           << ", current cost: " << CalcMaxProbUpperBound() - loglikeliBest
-           << ", opt tree: " << strTreeOpt
-           << ", tree neighborhood size: " << setNgbrTrees.size() << endl;
+    ctpl::thread_pool p(numThreads > 1 ? numThreads : 1);
+
+    std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
+    std::cout << "Starting ScistPerfPhyMLE calculation, while true..." << std::endl;
+    while (true)
+    {
+
+        set<string> setNgbrTrees;
+        if (fNNI == false && fSPR && numSPRPerformed <= maxSPRNum)
+        {
+            GetNgbrTreesFromSPR(this->genosInput.GetNumHaps(), strTreeOpt, setNgbrTrees);
+            ++numSPRPerformed;
+            //fNNI = true;
+        }
+        else if (fNNI == true)
+        {
+            GetNgbrTreesFrom(this->genosInput.GetNumHaps(), strTreeOpt, setNgbrTrees);
+        }
+        else
+        {
+            break;
+        }
+        if (fVerbose)
+        {
+            cout << "Current best likelihood: " << loglikeliBest << ", current cost: " << CalcMaxProbUpperBound() - loglikeliBest << ", opt tree: " << strTreeOpt << ", tree neighborhood size: " << setNgbrTrees.size() << endl;
+        }
+        //cout << "Current opt tree: " << strTreeOpt << endl;
+        bool fCont = false;
+
+        // allocate threadpool results vector
+        typedef std::tuple<double, std::set<std::string>::iterator, std::vector<std::pair<ScistPerfPhyCluster, ScistPerfPhyCluster>>> resultType;
+        std::vector<std::future<resultType>> results;
+        results.reserve(setNgbrTrees.size());
+
+        // queue calculations
+        for (set<string>::iterator it = setNgbrTrees.begin(); it != setNgbrTrees.end(); ++it)
+        {
+            if (setTreeSearchedBefore.find(*it) != setTreeSearchedBefore.end())
+            {
+                continue;
+            }
+            setTreeSearchedBefore.insert(*it);
+
+            results.push_back(p.push([this, it](int)
+                                     {
+                                         std::vector<std::pair<ScistPerfPhyCluster, ScistPerfPhyCluster>> listChangedClustersStep;
+                                         double loglikeliStep = this->ScoreTree(*it, listChangedClustersStep);
+                                         return (resultType(loglikeliStep, it, listChangedClustersStep));
+                                     }));
+        }
+
+        // screen for optimal trees
+        for (auto &i : results)
+        {
+            resultType res = i.get();
+            double loglikeliStep = std::get<0>(res);
+            if (loglikeliStep > loglikeliBest)
+            {
+                loglikeliBest = loglikeliStep;
+                strTreeOpt = *(std::get<1>(res));
+                listChangedClustersOpt = (std::get<2>(res));
+                fCont = true;
+            }
+        }
+
+        if (fCont == false)
+        {
+            if (fNNI == false)
+            {
+                break;
+            }
+
+            fNNI = false;
+            //break;
+        }
+        else
+        {
+            fNNI = true;
+        }
     }
-    // cout << "Current opt tree: " << strTreeOpt << endl;
-    bool fCont = false;
-    for (set<string>::iterator it = setNgbrTrees.begin();
-         it != setNgbrTrees.end(); ++it) {
-      if (setTreeSearchedBefore.find(*it) != setTreeSearchedBefore.end()) {
-        continue;
-      }
-      setTreeSearchedBefore.insert(*it);
-
-      // cout << "Neighbor tree: " << *it << endl;
-      // set<ScistPerfPhyCluster> setClus;
-      // GetClustersFromTree(*it, setClus);
-      vector<pair<ScistPerfPhyCluster, ScistPerfPhyCluster> >
-          listChangedClustersStep;
-      // double loglikeliStep = ScoreSetClusters( setClus,
-      // listChangedClustersStep);
-      double loglikeliStep = ScoreTree(*it, listChangedClustersStep);
-      // cout << ", loglikeliStep: " << loglikeliStep << ", cost: " <<
-      // CalcMaxProbUpperBound()- loglikeliStep << endl; if( loglikeliStep <
-      // loglikeliBest )
-      if (loglikeliStep > loglikeliBest) {
-        // cout << "BETTER.\n";
-        loglikeliBest = loglikeliStep;
-        strTreeOpt = *it;
-        listChangedClustersOpt = listChangedClustersStep;
-        fCont = true;
-      }
+    // END of WHILE loop
+    std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
+    std::cout << "...out of while loop" << std::endl;
+    std::cout << "Time elasped: " << std::chrono::duration_cast<std::chrono::seconds>(end - begin).count() << " [seconds]" << std::endl;
+
+    // output the final tree
+    std::set<std::pair<std::pair<int, int>, int>> listChangedPlaces;
+    for (int site = 0; site < this->genosInput.GetNumSites(); ++site)
+    {
+        FindChangedGenos(site, listChangedClustersOpt[site], listChangedPlaces);
     }
-    if (fCont == false) {
-      if (fNNI == false) {
-        break;
-      }
-
-      fNNI = false;
-      // break;
-    } else {
-      fNNI = true;
+    if (plistChangedPlaces != NULL)
+    {
+        *plistChangedPlaces = listChangedPlaces;
     }
-  }
-  // output the final tree
-  std::set<std::pair<std::pair<int, int>, int> > listChangedPlaces;
-  for (int site = 0; site < this->genosInput.GetNumSites(); ++site) {
-    FindChangedGenos(site, listChangedClustersOpt[site], listChangedPlaces);
-  }
-  if (plistChangedPlaces != NULL) {
-    *plistChangedPlaces = listChangedPlaces;
-  }
-  if (pstrTreeNW != NULL) {
-    *pstrTreeNW = strTreeOpt;
-  }
-
-  if (fVerbose) {
-    if (fOutput) {
-      cout << "Genotypes called by maximal single position probability\n";
-      const string strDesc = "Single-site maximal probability genotypes";
-      this->genosInput.OutputImput(&strDesc);
+    if (pstrTreeNW != NULL)
+    {
+        *pstrTreeNW = strTreeOpt;
     }
 
-    cout << "List of corrected genotypes (site, cell, new genotype) in base-1: "
-            "\n";
-    for (set<pair<pair<int, int>, int> >::iterator it =
-             listChangedPlaces.begin();
-         it != listChangedPlaces.end(); ++it) {
-      cout << "[ " << setw(6) << it->first.second + 1 << " " << setw(6)
-           << it->first.first + 1 << " ]: " << it->second << endl;
+    if (fVerbose)
+    {
+        if (fOutput)
+        {
+            cout << "Genotypes called by maximal single position probability\n";
+            const string strDesc = "Single-site maximal probability genotypes";
+            this->genosInput.OutputImput(&strDesc);
+        }
+
+        cout << "List of corrected genotypes (site, cell, new genotype) in base-1: \n";
+        for (set<pair<pair<int, int>, int>>::iterator it = listChangedPlaces.begin(); it != listChangedPlaces.end(); ++it)
+        {
+            cout << "[ " << setw(6) << it->first.second + 1 << " " << setw(6) << it->first.first + 1 << " ]: " << it->second << endl;
+        }
     }
-  }
-
-  if (fOutput) {
-    // output the matrix
-    ScistGenGenotypeMat *pMatRes = this->genosInput.Copy();
-    pMatRes->ChangeGenosAtPositions(listChangedPlaces);
-    if (fVerbose) {
-      cout << "Called genotypes\n";
-      pMatRes->OutputImput();
+
+    if (fOutput)
+    {
+        // output the matrix
+        ScistGenGenotypeMat *pMatRes = this->genosInput.Copy();
+        pMatRes->ChangeGenosAtPositions(listChangedPlaces);
+        if (fVerbose)
+        {
+            cout << "Called genotypes\n";
+            pMatRes->OutputImput();
+        }
+        if (fOutputPPWithEdgeLabels)
+        {
+            ScistHaplotypeMat *pMatResHap = dynamic_cast<ScistHaplotypeMat *>(pMatRes);
+            if (pMatResHap == NULL)
+            {
+                cout << "** Right now, only output perfect phylogeny for binary genotypes\n";
+            }
+            else
+            {
+                string strTreeEdgeLabel = ConsRootedPerfectPhylogenyFromMat(pMatResHap->GetHapMat(), true, true);
+                //cout << "** Perfect phylogeny (with sites labeled on edges) from the imputed genotypes: " << strTreeEdgeLabel << endl;
+
+                string strMutTree = ConsEdgeLabeTree(strTreeEdgeLabel);
+                string strMutTreeConv = ConvMutTreeStr(strMutTree);
+                cout << "^^ Mutation tree: " << strMutTreeConv << endl;
+
+                // output mutation tree file
+                OutputMutationTree(this->strMutTreeFileName.c_str(), strMutTreeConv, this->fOutputLabel);
+            }
+        }
+
+        delete pMatRes;
     }
-    if (fOutputPPWithEdgeLabels) {
-      ScistHaplotypeMat *pMatResHap =
-          dynamic_cast<ScistHaplotypeMat *>(pMatRes);
-      if (pMatResHap == NULL) {
-        cout << "** Right now, only output perfect phylogeny for binary "
-                "genotypes\n";
-      } else {
-        string strTreeEdgeLabel = ConsRootedPerfectPhylogenyFromMat(
-            pMatResHap->GetHapMat(), true, true);
-        // cout << "** Perfect phylogeny (with sites labeled on edges) from the
-        // imputed genotypes: " << strTreeEdgeLabel << endl;
-
-        string strMutTree = ConsEdgeLabeTree(strTreeEdgeLabel);
-        string strMutTreeConv = ConvMutTreeStr(strMutTree);
-        cout << "^^ Mutation tree: " << strMutTreeConv << endl;
-
-        // output mutation tree file
-        OutputMutationTree(this->strMutTreeFileName.c_str(), strMutTreeConv,
-                           this->fOutputLabel);
-      }
+
+    // change genotype
+    for (set<pair<pair<int, int>, int>>::iterator it = listChangedPlaces.begin(); it != listChangedPlaces.end(); ++it)
+    {
+        this->genosInput.SetGenotypeAt(it->first.first, it->first.second, it->second);
     }
 
-    delete pMatRes;
-  }
-
-  // change genotype
-  for (set<pair<pair<int, int>, int> >::iterator it = listChangedPlaces.begin();
-       it != listChangedPlaces.end(); ++it) {
-    this->genosInput.SetGenotypeAt(it->first.first, it->first.second,
-                                   it->second);
-  }
-
-  double res = loglikeliBest;
-
-  if (fOutput) {
-    cout << "**** Maximum log-likelihood: " << loglikeliBest
-         << ", number of changed genotypes: " << listChangedPlaces.size()
-         << endl;
-    cout << "Computed log-lielihood from changed genotypes: "
-         << CalcChangedGenosProb(listChangedPlaces) << endl;
-    // cout << "Minimum cost: " << CalcMaxProbUpperBound() - loglikeliBest <<
-    // endl;
-
-    string strTreeOptOut = ConvCellTreeStr(strTreeOpt);
-    cout << "Constructed single cell phylogeny: " << strTreeOptOut << endl;
-  }
-  if (fOptBrLen) {
-    string strTreeBrOpt;
-    double loglikeliBestBr = OptBranchLens(strTreeOpt, strTreeBrOpt);
-    res = loglikeliBestBr;
-    if (fOutput) {
-      cout << "**** Maximum log-likelihood (with branch length optimization): "
-           << loglikeliBestBr << endl;
-      string strTreeBrOptOut = ConvCellTreeStr(strTreeBrOpt);
-      cout << "Single cell phylogeny with branch length: " << strTreeBrOptOut
-           << endl;
+    double res = loglikeliBest;
+
+    if (fOutput)
+    {
+        cout << "**** Maximum log-likelihood: " << loglikeliBest << ", number of changed genotypes: " << listChangedPlaces.size() << endl;
+        cout << "Computed log-lielihood from changed genotypes: " << CalcChangedGenosProb(listChangedPlaces) << endl;
+        //cout << "Minimum cost: " << CalcMaxProbUpperBound() - loglikeliBest << endl;
+
+        string strTreeOptOut = ConvCellTreeStr(strTreeOpt);
+        cout << "Constructed single cell phylogeny: " << strTreeOptOut << endl;
+    }
+    if (fOptBrLen)
+    {
+        string strTreeBrOpt;
+        double loglikeliBestBr = OptBranchLens(strTreeOpt, strTreeBrOpt);
+        res = loglikeliBestBr;
+        if (fOutput)
+        {
+            cout << "**** Maximum log-likelihood (with branch length optimization): " << loglikeliBestBr << endl;
+            string strTreeBrOptOut = ConvCellTreeStr(strTreeBrOpt);
+            cout << "Single cell phylogeny with branch length: " << strTreeBrOptOut << endl;
+        }
     }
-  }
-  return res;
+    return res;
 }
 
-double ScistPerfPhyMLE ::OptBranchLens(const std::string &strTree,
-                                       std::string &strTreeBrOpt) {
-  //
-  MarginalTree treeBrOpt;
-  ReadinMarginalTreesNewickWLenString(strTree, this->genosInput.GetNumHaps(),
-                                      treeBrOpt);
-  ScistFullPerfPhyMLE sfpp(this->genosInput);
-  double res = sfpp.OptBranchLens(treeBrOpt);
-  strTreeBrOpt = treeBrOpt.GetNewickSorted(true);
-  return res;
+double ScistPerfPhyMLE ::OptBranchLens(const std::string &strTree, std::string &strTreeBrOpt)
+{
+    MarginalTree treeBrOpt;
+    ReadinMarginalTreesNewickWLenString(strTree, this->genosInput.GetNumHaps(), treeBrOpt);
+    ScistFullPerfPhyMLE sfpp(this->genosInput);
+    double res = sfpp.OptBranchLens(treeBrOpt);
+    strTreeBrOpt = treeBrOpt.GetNewickSorted(true);
+    return res;
 }
 
-void ScistPerfPhyMLE ::Init() {
-  //
-  // get all clusters
-  listClusMutsInputHetero.clear();
-  listClusMutsInputHomo.clear();
-  for (int s = 0; s < genosInput.GetNumSites(); ++s) {
-    set<int> muts;
-    genosInput.GetRowsWithGenoAtSite(s, 1, muts);
-    ScistPerfPhyCluster clus(muts);
-    listClusMutsInputHetero.push_back(clus);
-
-    set<int> muts2;
-    genosInput.GetRowsWithGenoAtSite(s, 2, muts2);
-    ScistPerfPhyCluster clus2(muts2);
-    listClusMutsInputHomo.push_back(clus2);
-  }
-
-  this->genosInput.GetColMultiplicityMap(listInputColMulti);
-
-  // construct NJ tree as the initial tree
-  string strNJ = this->genosInput.ConsNJTreeZeroRoot();
-  // cout << "Guide tree: " << strNJ << endl;
-  // string strNJ = this->genosInput.ConsNJTree();
-  // cout << "Zero-rooted initial tree: " << strNJ << endl;
-  // cout << "Genotype input: \n";
-  // this->genosInput.Dump();
-  //
-  this->treeGuide.Init(strNJ);
-
-  // set the prior score to be zero
-  listSitePriorScore.clear();
-  for (int i = 0; i < this->genosInput.GetNumSites(); ++i) {
-    double logprobInit = 0.0;
-    for (int h = 0; h < this->genosInput.GetNumHaps(); ++h) {
-      double p = this->genosInput.GetGenotypeProbAllele0At(h, i);
-      logprobInit += log(p);
+void ScistPerfPhyMLE ::Init()
+{
+    //
+    // get all clusters
+    listClusMutsInputHetero.clear();
+    listClusMutsInputHomo.clear();
+    for (int s = 0; s < genosInput.GetNumSites(); ++s)
+    {
+        set<int> muts;
+        genosInput.GetRowsWithGenoAtSite(s, 1, muts);
+        ScistPerfPhyCluster clus(muts);
+        listClusMutsInputHetero.push_back(clus);
+
+        set<int> muts2;
+        genosInput.GetRowsWithGenoAtSite(s, 2, muts2);
+        ScistPerfPhyCluster clus2(muts2);
+        listClusMutsInputHomo.push_back(clus2);
+    }
+
+    this->genosInput.GetColMultiplicityMap(listInputColMulti);
+
+    // construct NJ tree as the initial tree
+    string strNJ = this->genosInput.ConsNJTreeZeroRoot();
+
+    //
+    this->treeGuide.Init(strNJ);
+
+    // set the prior score to be zero
+    listSitePriorScore.clear();
+    for (int i = 0; i < this->genosInput.GetNumSites(); ++i)
+    {
+        double logprobInit = 0.0;
+        for (int h = 0; h < this->genosInput.GetNumHaps(); ++h)
+        {
+            double p = this->genosInput.GetGenotypeProbAllele0At(h, i);
+            logprobInit += log(p);
+        }
+        listSitePriorScore.push_back(logprobInit);
     }
-    listSitePriorScore.push_back(logprobInit);
-  }
 }
 
-std::string ScistPerfPhyMLE ::ConsTreeFromSetClusters(
-    const std::set<ScistPerfPhyCluster> &setClusters) const {
-  // cout << "All the clusters: \n";
-  // for(set<ScistPerfPhyCluster> :: const_iterator it = setClusters.begin(); it
-  // != setClusters.end(); ++it)
-  //{
-  // it->Dump();
-  //}
-  //
-  // now construct tree
-  ScistInfPerfPhyUtils treeBuild;
-  map<int, ScistPerfPhyCluster> mapPickedClus;
-  int s = 0;
-  for (set<ScistPerfPhyCluster>::iterator it = setClusters.begin();
-       it != setClusters.end(); ++it) {
-    mapPickedClus[s] = *it;
-    ++s;
-  }
-  string strTree =
-      treeBuild.ConsTreeWCombDistClus(this->genosInput, mapPickedClus, false);
-  return strTree;
+std::string ScistPerfPhyMLE ::ConsTreeFromSetClusters(const std::set<ScistPerfPhyCluster> &setClusters) const
+{
+    //
+    // now construct tree
+    ScistInfPerfPhyUtils treeBuild;
+    map<int, ScistPerfPhyCluster> mapPickedClus;
+    int s = 0;
+    for (set<ScistPerfPhyCluster>::iterator it = setClusters.begin(); it != setClusters.end(); ++it)
+    {
+        mapPickedClus[s] = *it;
+        ++s;
+    }
+    string strTree = treeBuild.ConsTreeWCombDistClus(this->genosInput, mapPickedClus, false);
+    return strTree;
 }
 
-void ScistPerfPhyMLE ::GetNgbrTreesFrom(int numHaps, const std::string &strTree,
-                                        std::set<std::string> &setNgbrTrees) {
-  // cout << "GetNgbrTreesFrom: numHaps: " << numHaps << ", tree: " << strTree
-  // << endl;
-  //
-  setNgbrTrees.clear();
-  MarginalTree treeCurr;
-  ReadinMarginalTreesNewickWLenString(strTree, numHaps, treeCurr);
-  vector<MarginalTree> listNgbrTrees;
-  FindOneNNIMTreesFrom(treeCurr, listNgbrTrees);
-  for (int i = 0; i < (int)listNgbrTrees.size(); ++i) {
-    string strTree = listNgbrTrees[i].GetNewickSorted(false);
-    setNgbrTrees.insert(strTree);
-  }
+void ScistPerfPhyMLE ::GetNgbrTreesFrom(int numHaps, const std::string &strTree, std::set<std::string> &setNgbrTrees)
+{
+    setNgbrTrees.clear();
+    MarginalTree treeCurr;
+    ReadinMarginalTreesNewickWLenString(strTree, numHaps, treeCurr);
+    vector<MarginalTree> listNgbrTrees;
+    FindOneNNIMTreesFrom(treeCurr, listNgbrTrees);
+    for (int i = 0; i < (int)listNgbrTrees.size(); ++i)
+    {
+        string strTree = listNgbrTrees[i].GetNewickSorted(false);
+        setNgbrTrees.insert(strTree);
+    }
 }
 
-void ScistPerfPhyMLE ::GetNgbrTreesFromSPR(
-    int numHaps, const std::string &strTree,
-    std::set<std::string> &setNgbrTrees) {
-  //
-  setNgbrTrees.clear();
-  MarginalTree treeCurr;
-  ReadinMarginalTreesNewickWLenString(strTree, numHaps, treeCurr);
-  string strSelf = treeCurr.GetNewickSorted(false);
-  // cout << "strTree: " << strTree << ", strSelf: " << strSelf << endl;
-
-  // map to consecutive order as required by RBT
-  vector<int> listLeafLblsOld;
-  // treeCurr.MapLeafLblConsecutiveOrder( listLeafLblsOld );
-  treeCurr.GetLabelList(listLeafLblsOld);
-  // cout << "Mapped leaves: ";
-  // DumpIntVec(listLeafLblsOld);
-  // cout << "Changed tree: " << treeCurr.GetNewick() << endl;
-
-  // use RBT utility
-  vector<int> listLbls;
-  treeCurr.GetLabelList(listLbls);
-  // cout << "listLbss: ";
-  // DumpIntVec(listLbls);
-  vector<int> parPosList;
-  treeCurr.GetParPosInfo(parPosList);
-  // cout << "parPosList: ";
-  // DumpIntVec(parPosList);
-  vector<double> listEdgeDistOut;
-  treeCurr.GetTreeEdgeLen(listEdgeDistOut);
-  RBT treeCurrRBT(numHaps, listLbls, parPosList, listEdgeDistOut);
-  vector<RBT *> ngbrTrees;
-  treeCurrRBT.FindSPRDistOneNgbrs(ngbrTrees);
-
-  // cout << "GetNgbrTreesFromSPR: init tree: " << strTree << endl;
-  for (int i = 0; i < (int)ngbrTrees.size(); ++i) {
-    string strNW = ngbrTrees[i]->GetNewick();
-    string strNWBack = RemapLeafLbls(numHaps, strNW, listLeafLblsOld);
-    setNgbrTrees.insert(strNWBack);
-    // cout << "strNW: " << strNW  << ", SPR tree: " << strNWBack << endl;
-  }
-  // remove self
-  setNgbrTrees.erase(strSelf);
-
-  for (int i = 0; i < (int)ngbrTrees.size(); ++i) {
-    delete ngbrTrees[i];
-  }
+void ScistPerfPhyMLE ::GetNgbrTreesFromSPR(int numHaps, const std::string &strTree, std::set<std::string> &setNgbrTrees)
+{
+    //
+    setNgbrTrees.clear();
+    MarginalTree treeCurr;
+    ReadinMarginalTreesNewickWLenString(strTree, numHaps, treeCurr);
+    string strSelf = treeCurr.GetNewickSorted(false);
+    //cout << "strTree: " << strTree << ", strSelf: " << strSelf << endl;
+
+    // map to consecutive order as required by RBT
+    vector<int> listLeafLblsOld;
+    //treeCurr.MapLeafLblConsecutiveOrder( listLeafLblsOld );
+    treeCurr.GetLabelList(listLeafLblsOld);
+
+    // use RBT utility
+    vector<int> listLbls;
+    treeCurr.GetLabelList(listLbls);
+    //cout << "listLbss: ";
+    //DumpIntVec(listLbls);
+    vector<int> parPosList;
+    treeCurr.GetParPosInfo(parPosList);
+    //cout << "parPosList: ";
+    //DumpIntVec(parPosList);
+    vector<double> listEdgeDistOut;
+    treeCurr.GetTreeEdgeLen(listEdgeDistOut);
+    RBT treeCurrRBT(numHaps, listLbls, parPosList, listEdgeDistOut);
+    vector<RBT *> ngbrTrees;
+    treeCurrRBT.FindSPRDistOneNgbrs(ngbrTrees);
+
+    //cout << "GetNgbrTreesFromSPR: init tree: " << strTree << endl;
+    for (int i = 0; i < (int)ngbrTrees.size(); ++i)
+    {
+        string strNW = ngbrTrees[i]->GetNewick();
+        string strNWBack = RemapLeafLbls(numHaps, strNW, listLeafLblsOld);
+        setNgbrTrees.insert(strNWBack);
+        //cout << "strNW: " << strNW  << ", SPR tree: " << strNWBack << endl;
+    }
+    // remove self
+    setNgbrTrees.erase(strSelf);
+
+    for (int i = 0; i < (int)ngbrTrees.size(); ++i)
+    {
+        delete ngbrTrees[i];
+    }
 }
 
-std::string ScistPerfPhyMLE ::RemapLeafLbls(int numHaps,
-                                            const std::string &strTree0Based,
-                                            const vector<int> &listLblsOld) {
-  //
-  MarginalTree treeCurr;
-  ReadinMarginalTreesNewickWLenString(strTree0Based, numHaps, treeCurr);
-  map<int, int> mapLblsBack;
-  for (int i = 0; i < (int)listLblsOld.size(); ++i) {
-    mapLblsBack[i] = listLblsOld[i];
-  }
-  treeCurr.RemapLeafLabels(mapLblsBack);
-  return treeCurr.GetNewickSorted(false);
+std::string ScistPerfPhyMLE ::RemapLeafLbls(int numHaps, const std::string &strTree0Based, const vector<int> &listLblsOld)
+{
+    //
+    MarginalTree treeCurr;
+    ReadinMarginalTreesNewickWLenString(strTree0Based, numHaps, treeCurr);
+    map<int, int> mapLblsBack;
+    for (int i = 0; i < (int)listLblsOld.size(); ++i)
+    {
+        mapLblsBack[i] = listLblsOld[i];
+    }
+    treeCurr.RemapLeafLabels(mapLblsBack);
+    return treeCurr.GetNewickSorted(false);
 }
 
-std::string
-ScistPerfPhyMLE ::RemapLeafLbls(int numHaps, const std::string &strTree,
-                                const std::map<int, int> &mapLabels) {
-  //
-  MarginalTree treeCurr;
-  ReadinMarginalTreesNewickWLenString(strTree, numHaps, treeCurr);
-  treeCurr.RemapLeafLabels(mapLabels);
-  return treeCurr.GetNewickSorted(false);
+std::string ScistPerfPhyMLE ::RemapLeafLbls(int numHaps, const std::string &strTree, const std::map<int, int> &mapLabels)
+{
+    //
+    MarginalTree treeCurr;
+    ReadinMarginalTreesNewickWLenString(strTree, numHaps, treeCurr);
+    treeCurr.RemapLeafLabels(mapLabels);
+    return treeCurr.GetNewickSorted(false);
 }
 
-std::string
-ScistPerfPhyMLE ::ConvCellTreeStr(const std::string &strTree) const {
-  //
-  if (this->listCellNames.size() == 0) {
-    // no conversion if no cell names specified
-    return strTree;
-  }
-
-  TaxaMapper taxaMapper;
-  for (int i = 0; i < (int)listCellNames.size(); ++i) {
-    taxaMapper.AddTaxaStringWithId(i + 1, listCellNames[i]);
-  }
-  //
-  return taxaMapper.ConvIdStringWithOrigTaxa(strTree);
+std::string ScistPerfPhyMLE ::ConvCellTreeStr(const std::string &strTree) const
+{
+    //
+    if (this->listCellNames.size() == 0)
+    {
+        // no conversion if no cell names specified
+        return strTree;
+    }
+
+    TaxaMapper taxaMapper;
+    for (int i = 0; i < (int)listCellNames.size(); ++i)
+    {
+        taxaMapper.AddTaxaStringWithId(i + 1, listCellNames[i]);
+    }
+    //
+    return taxaMapper.ConvIdStringWithOrigTaxa(strTree);
 }
 
-std::string ScistPerfPhyMLE ::ConvMutTreeStr(const std::string &strTree) const {
-  //
-  if (this->listSiteNames.size() == 0) {
-    // no conversion if no cell names specified
-    return strTree;
-  }
-
-  TaxaMapper taxaMapper;
-  for (int i = 0; i < (int)listSiteNames.size(); ++i) {
-    taxaMapper.AddTaxaStringWithId(i + 1, listSiteNames[i]);
-  }
-  //
-  return taxaMapper.ConvIdStringWithOrigTaxa(strTree);
+std::string ScistPerfPhyMLE ::ConvMutTreeStr(const std::string &strTree) const
+{
+    //
+    if (this->listSiteNames.size() == 0)
+    {
+        // no conversion if no cell names specified
+        return strTree;
+    }
+
+    TaxaMapper taxaMapper;
+    for (int i = 0; i < (int)listSiteNames.size(); ++i)
+    {
+        taxaMapper.AddTaxaStringWithId(i + 1, listSiteNames[i]);
+    }
+    //
+    return taxaMapper.ConvIdStringWithOrigTaxa(strTree);
 }
 
-void ScistPerfPhyMLE ::FindChangedGenos(
-    int siteToAdd,
-    const pair<ScistPerfPhyCluster, ScistPerfPhyCluster> &clusToAdd,
-    set<pair<pair<int, int>, int> > &listChangedPlaces) const {
-  // find list of positions where the genos are changed
-  ScistPerfPhyCluster clusInt, clusThisOnly, clusRHSOnly;
-  clusToAdd.first.IntersectWith(listClusMutsInputHetero[siteToAdd], clusInt,
-                                clusThisOnly, clusRHSOnly);
-  ScistPerfPhyCluster clusInt2, clusThisOnly2, clusRHSOnly2;
-  clusToAdd.second.IntersectWith(listClusMutsInputHomo[siteToAdd], clusInt2,
-                                 clusThisOnly2, clusRHSOnly2);
-  // get changed 0
-  set<int> setss;
-  PopulateSetWithInterval(setss, 0, this->genosInput.GetNumHaps() - 1);
-  set<int> rows0Orig;
-  this->genosInput.GetRowsWithGenoAtSite(siteToAdd, 0, rows0Orig);
-  SubtractSets(setss, rows0Orig);
-  ScistPerfPhyCluster clus0(setss);
-  clus0.SubtractFrom(clusToAdd.first);
-  clus0.SubtractFrom(clusToAdd.second);
-
-  ScistPerfPhyClusterItor itor0(clus0);
-  itor0.First();
-  while (itor0.IsDone() == false) {
-    int sc = itor0.GetCurrentSC();
-    pair<int, int> pp(sc, siteToAdd);
-    pair<pair<int, int>, int> pp0(pp, 0);
-    listChangedPlaces.insert(pp0);
-    itor0.Next();
-  }
-
-  // This only: new mutants
-  ScistPerfPhyClusterItor itor1(clusThisOnly);
-  itor1.First();
-  while (itor1.IsDone() == false) {
-    int sc = itor1.GetCurrentSC();
-    pair<int, int> pp(sc, siteToAdd);
-    pair<pair<int, int>, int> pp1(pp, 1);
-    listChangedPlaces.insert(pp1);
-    itor1.Next();
-  }
-  // RHS only: new wildtype
-  ScistPerfPhyClusterItor itor2(clusThisOnly2);
-  itor2.First();
-  while (itor2.IsDone() == false) {
-    int sc = itor2.GetCurrentSC();
-    pair<int, int> pp(sc, siteToAdd);
-    pair<pair<int, int>, int> pp2(pp, 2);
-    listChangedPlaces.insert(pp2);
-    itor2.Next();
-  }
+void ScistPerfPhyMLE ::FindChangedGenos(int siteToAdd, const pair<ScistPerfPhyCluster, ScistPerfPhyCluster> &clusToAdd, set<pair<pair<int, int>, int>> &listChangedPlaces) const
+{
+    // find list of positions where the genos are changed
+    ScistPerfPhyCluster clusInt, clusThisOnly, clusRHSOnly;
+    clusToAdd.first.IntersectWith(listClusMutsInputHetero[siteToAdd], clusInt, clusThisOnly, clusRHSOnly);
+    ScistPerfPhyCluster clusInt2, clusThisOnly2, clusRHSOnly2;
+    clusToAdd.second.IntersectWith(listClusMutsInputHomo[siteToAdd], clusInt2, clusThisOnly2, clusRHSOnly2);
+    // get changed 0
+    set<int> setss;
+    PopulateSetWithInterval(setss, 0, this->genosInput.GetNumHaps() - 1);
+    set<int> rows0Orig;
+    this->genosInput.GetRowsWithGenoAtSite(siteToAdd, 0, rows0Orig);
+    SubtractSets(setss, rows0Orig);
+    ScistPerfPhyCluster clus0(setss);
+    clus0.SubtractFrom(clusToAdd.first);
+    clus0.SubtractFrom(clusToAdd.second);
+
+    ScistPerfPhyClusterItor itor0(clus0);
+    itor0.First();
+    while (itor0.IsDone() == false)
+    {
+        int sc = itor0.GetCurrentSC();
+        pair<int, int> pp(sc, siteToAdd);
+        pair<pair<int, int>, int> pp0(pp, 0);
+        listChangedPlaces.insert(pp0);
+        itor0.Next();
+    }
+
+    // This only: new mutants
+    ScistPerfPhyClusterItor itor1(clusThisOnly);
+    itor1.First();
+    while (itor1.IsDone() == false)
+    {
+        int sc = itor1.GetCurrentSC();
+        pair<int, int> pp(sc, siteToAdd);
+        pair<pair<int, int>, int> pp1(pp, 1);
+        listChangedPlaces.insert(pp1);
+        itor1.Next();
+    }
+    // RHS only: new wildtype
+    ScistPerfPhyClusterItor itor2(clusThisOnly2);
+    itor2.First();
+    while (itor2.IsDone() == false)
+    {
+        int sc = itor2.GetCurrentSC();
+        pair<int, int> pp(sc, siteToAdd);
+        pair<pair<int, int>, int> pp2(pp, 2);
+        listChangedPlaces.insert(pp2);
+        itor2.Next();
+    }
 }
 
-double ScistPerfPhyMLE ::ScoreTree(
-    const string &strTree,
-    std::vector<std::pair<ScistPerfPhyCluster, ScistPerfPhyCluster> >
-        &listChangedCluster) const {
-  // cout << "ScoreTree: tree: " << strTree << endl;
-  // score the current tree
-  MarginalTree treeToScore;
-  ReadinMarginalTreesNewickWLenString(strTree, this->genosInput.GetNumHaps(),
-                                      treeToScore);
-  // cout << "Score tree: " << treeToScore.GetNewick() << endl;
-  set<pair<ScistPerfPhyCluster, ScistPerfPhyCluster> > setClusDone;
-  map<pair<ScistPerfPhyCluster, ScistPerfPhyCluster>,
-      pair<ScistPerfPhyCluster, ScistPerfPhyCluster> >
-      mapChangedClus;
-  double res = 0.0;
-  ScistPerfPhyProbOnTree probTree(this->genosInput, treeToScore);
-
-  for (int site = 0; site < genosInput.GetNumSites(); ++site) {
-    // cout << "ScoreTree: site " << site << " multi:" <<
-    // this->listInputColMulti[site] << endl; cout << "Heterozygote clus: ";
-    // listClusMutsInputHetero[site].Dump();
-    // cout << "Homozygous clus: ";
-    // listClusMutsInputHomo[site].Dump();
-    pair<ScistPerfPhyCluster, ScistPerfPhyCluster> pp0(
-        listClusMutsInputHetero[site], listClusMutsInputHomo[site]);
-    if (setClusDone.find(pp0) != setClusDone.end()) {
-      listChangedCluster.push_back(mapChangedClus[pp0]);
-      continue;
+double ScistPerfPhyMLE ::ScoreTree(const string &strTree, std::vector<std::pair<ScistPerfPhyCluster, ScistPerfPhyCluster>> &listChangedCluster) const
+{
+    //cout << "ScoreTree: tree: " << strTree << endl;
+    // score the current tree
+    MarginalTree treeToScore;
+    ReadinMarginalTreesNewickWLenString(strTree, this->genosInput.GetNumHaps(), treeToScore);
+    //cout << "Score tree: " << treeToScore.GetNewick() << endl;
+    set<pair<ScistPerfPhyCluster, ScistPerfPhyCluster>> setClusDone;
+    map<pair<ScistPerfPhyCluster, ScistPerfPhyCluster>, pair<ScistPerfPhyCluster, ScistPerfPhyCluster>> mapChangedClus;
+    double res = 0.0;
+    ScistPerfPhyProbOnTree probTree(this->genosInput, treeToScore);
+
+    for (int site = 0; site < genosInput.GetNumSites(); ++site)
+    {
+
+        pair<ScistPerfPhyCluster, ScistPerfPhyCluster> pp0(listClusMutsInputHetero[site], listClusMutsInputHomo[site]);
+        if (setClusDone.find(pp0) != setClusDone.end())
+        {
+            listChangedCluster.push_back(mapChangedClus[pp0]);
+            continue;
+        }
+        int multi = this->listInputColMulti[site];
+        pair<ScistPerfPhyCluster, ScistPerfPhyCluster> clusChanged;
+        double loglikeliSite = ScoreTreeWithSite(probTree, treeToScore, site, clusChanged.first, clusChanged.second);
+        mapChangedClus[pp0] = clusChanged;
+        listChangedCluster.push_back(clusChanged);
+        res += loglikeliSite * multi;
+        setClusDone.insert(pp0);
     }
-    int multi = this->listInputColMulti[site];
-    pair<ScistPerfPhyCluster, ScistPerfPhyCluster> clusChanged;
-    double loglikeliSite = ScoreTreeWithSite(
-        probTree, treeToScore, site, clusChanged.first, clusChanged.second);
-    mapChangedClus[pp0] = clusChanged;
-    listChangedCluster.push_back(clusChanged);
-    res += loglikeliSite * multi;
-    setClusDone.insert(pp0);
-    // cout << "site prob: " << loglikeliSite << ": clusChanged: ";
-    // clusChanged.first.Dump();
-    // cout << "  and ";
-    // clusChanged.second.Dump();
-  }
-
-  return res;
+
+    return res;
 }
 
-double
-ScistPerfPhyMLE ::ScoreTreeWithSite(ScistPerfPhyProbOnTree &probTree,
-                                    MarginalTree &tree, int site,
-                                    ScistPerfPhyCluster &clusChanged1,
-                                    ScistPerfPhyCluster &clusChanged2) const {
-  // cout << "site: " << site << ", tree: " << tree.GetNewickSorted(false) <<
-  // endl;
-  return probTree.CalcProbMaxForSite(site, clusChanged1, clusChanged2);
+double ScistPerfPhyMLE ::ScoreTreeWithSite(ScistPerfPhyProbOnTree &probTree, MarginalTree &tree, int site, ScistPerfPhyCluster &clusChanged1, ScistPerfPhyCluster &clusChanged2) const
+{
+    //cout << "site: " << site << ", tree: " << tree.GetNewickSorted(false) << endl;
+    return probTree.CalcProbMaxForSite(site, clusChanged1, clusChanged2);
 }
 
-double ScistPerfPhyMLE ::CalcMaxProbUpperBound() const {
-  //
-  double res = 0.0;
-  for (int s = 0; s < this->genosInput.GetNumSites(); ++s) {
-    for (int h = 0; h < this->genosInput.GetNumHaps(); ++h) {
-      double p0 = this->genosInput.GetGenotypeProbAllele0At(h, s);
-      double p1 = 1 - p0;
-      if (p0 >= p1) {
-        res += log(p0);
-      } else {
-        res += log(p1);
-      }
+double ScistPerfPhyMLE ::CalcMaxProbUpperBound() const
+{
+    //
+    double res = 0.0;
+    for (int s = 0; s < this->genosInput.GetNumSites(); ++s)
+    {
+        for (int h = 0; h < this->genosInput.GetNumHaps(); ++h)
+        {
+            double p0 = this->genosInput.GetGenotypeProbAllele0At(h, s);
+            double p1 = 1 - p0;
+            if (p0 >= p1)
+            {
+                res += log(p0);
+            }
+            else
+            {
+                res += log(p1);
+            }
+        }
     }
-  }
-  return res;
+    return res;
 }
 
-double ScistPerfPhyMLE ::CalcChangedGenosProb(
-    const std::set<std::pair<std::pair<int, int>, int> > &listChangedPlaces)
-    const {
-  //
-  double res = 0.0;
-  map<pair<int, int>, int> mapChangedPlaces;
-  for (std::set<std::pair<std::pair<int, int>, int> >::const_iterator it =
-           listChangedPlaces.begin();
-       it != listChangedPlaces.end(); ++it) {
-    mapChangedPlaces[it->first] = it->second;
-  }
-
-  for (int s = 0; s < this->genosInput.GetNumSites(); ++s) {
-    for (int h = 0; h < this->genosInput.GetNumHaps(); ++h) {
-      pair<int, int> pp(h, s);
-      int allele = this->genosInput.GetGenotypeAt(h, s);
-      std::map<std::pair<int, int>, int>::const_iterator it =
-          mapChangedPlaces.find(pp);
-      if (it != mapChangedPlaces.end()) {
-        int alleleAlt = it->second;
-        YW_ASSERT_INFO(allele == alleleAlt, "Wrong");
-        allele = alleleAlt;
-      }
-
-      double p0 = this->genosInput.GetGenotypeProbAllele0At(h, s);
-      double p1 = 1 - p0;
-      if (allele == 0) {
-        res += log(p0);
-      } else {
-        res += log(p1);
-      }
+double ScistPerfPhyMLE ::CalcChangedGenosProb(const std::set<std::pair<std::pair<int, int>, int>> &listChangedPlaces) const
+{
+    //
+    double res = 0.0;
+    map<pair<int, int>, int> mapChangedPlaces;
+    for (std::set<std::pair<std::pair<int, int>, int>>::const_iterator it = listChangedPlaces.begin(); it != listChangedPlaces.end(); ++it)
+    {
+        mapChangedPlaces[it->first] = it->second;
+    }
+
+    for (int s = 0; s < this->genosInput.GetNumSites(); ++s)
+    {
+        for (int h = 0; h < this->genosInput.GetNumHaps(); ++h)
+        {
+            pair<int, int> pp(h, s);
+            int allele = this->genosInput.GetGenotypeAt(h, s);
+            std::map<std::pair<int, int>, int>::const_iterator it = mapChangedPlaces.find(pp);
+            if (it != mapChangedPlaces.end())
+            {
+                int alleleAlt = it->second;
+                YW_ASSERT_INFO(allele == alleleAlt, "Wrong");
+                allele = alleleAlt;
+            }
+
+            double p0 = this->genosInput.GetGenotypeProbAllele0At(h, s);
+            double p1 = 1 - p0;
+            if (allele == 0)
+            {
+                res += log(p0);
+            }
+            else
+            {
+                res += log(p1);
+            }
+        }
     }
-  }
 
-  return res;
+    return res;
 }
 
 // *************************************************************************************
 // Tree probability
 
-ScistPerfPhyProbOnTree ::ScistPerfPhyProbOnTree(ScistGenGenotypeMat &genos,
-                                                MarginalTree &mtreeIn)
-    : genosInput(genos), mtree(mtreeIn) {
-  // set the prior score to be zero
-  listSitePriorScore.clear();
-  for (int i = 0; i < this->genosInput.GetNumSites(); ++i) {
-    double logprobInit = 0.0;
-    for (int h = 0; h < this->genosInput.GetNumHaps(); ++h) {
-      double p = this->genosInput.GetGenotypeProbAt(h, i, 0);
-      logprobInit += log(p);
+ScistPerfPhyProbOnTree ::ScistPerfPhyProbOnTree(ScistGenGenotypeMat &genos, MarginalTree &mtreeIn) : genosInput(genos), mtree(mtreeIn)
+{
+    // set the prior score to be zero
+    listSitePriorScore.clear();
+    for (int i = 0; i < this->genosInput.GetNumSites(); ++i)
+    {
+        double logprobInit = 0.0;
+        for (int h = 0; h < this->genosInput.GetNumHaps(); ++h)
+        {
+            double p = this->genosInput.GetGenotypeProbAt(h, i, 0);
+            logprobInit += log(p);
+        }
+        listSitePriorScore.push_back(logprobInit);
     }
-    listSitePriorScore.push_back(logprobInit);
-  }
-  Init();
+    Init();
 }
 
-void ScistPerfPhyProbOnTree ::Init() {
-  //
-  ScistTernaryMat *pGenoMat =
-      dynamic_cast<ScistTernaryMat *>(&this->genosInput);
-  if (pGenoMat == NULL) {
-    return; // only work with genotype data
-  }
-  this->genosInputHap.SetSize(this->genosInput.GetNumHaps(),
-                              this->genosInput.GetNumSites() * 2);
-  for (int h = 0; h < this->genosInput.GetNumHaps(); ++h) {
-    for (int s = 0; s < this->genosInput.GetNumSites(); ++s) {
-      double p0 = pGenoMat->GetGenotypeProbAt(h, s, 0);
-      double p1 = pGenoMat->GetGenotypeProbAt(h, s, 1);
-      double p2 = pGenoMat->GetGenotypeProbAt(h, s, 2);
-      double p12 = p1 + p2;
-      double p01 = p0 + p1;
-      int allele0 = 0;
-      if (p0 < p12) {
-        allele0 = 1;
-      }
-      this->genosInputHap.SetGenotypeAt(h, 2 * s, allele0);
-      this->genosInputHap.SetGenotypeProbAt(h, 2 * s, p0);
-      int allele1 = 0;
-      if (p01 < p2) {
-        allele1 = 1;
-      }
-      this->genosInputHap.SetGenotypeAt(h, 2 * s + 1, allele1);
-      this->genosInputHap.SetGenotypeProbAt(h, 2 * s + 1, p01);
+void ScistPerfPhyProbOnTree ::Init()
+{
+    //
+    ScistTernaryMat *pGenoMat = dynamic_cast<ScistTernaryMat *>(&this->genosInput);
+    if (pGenoMat == NULL)
+    {
+        return; // only work with genotype data
+    }
+    this->genosInputHap.SetSize(this->genosInput.GetNumHaps(), this->genosInput.GetNumSites() * 2);
+    for (int h = 0; h < this->genosInput.GetNumHaps(); ++h)
+    {
+        for (int s = 0; s < this->genosInput.GetNumSites(); ++s)
+        {
+            double p0 = pGenoMat->GetGenotypeProbAt(h, s, 0);
+            double p1 = pGenoMat->GetGenotypeProbAt(h, s, 1);
+            double p2 = pGenoMat->GetGenotypeProbAt(h, s, 2);
+            double p12 = p1 + p2;
+            double p01 = p0 + p1;
+            int allele0 = 0;
+            if (p0 < p12)
+            {
+                allele0 = 1;
+            }
+            this->genosInputHap.SetGenotypeAt(h, 2 * s, allele0);
+            this->genosInputHap.SetGenotypeProbAt(h, 2 * s, p0);
+            int allele1 = 0;
+            if (p01 < p2)
+            {
+                allele1 = 1;
+            }
+            this->genosInputHap.SetGenotypeAt(h, 2 * s + 1, allele1);
+            this->genosInputHap.SetGenotypeProbAt(h, 2 * s + 1, p01);
+        }
     }
-  }
 }
 
-double ScistPerfPhyProbOnTree ::CalcProbMaxForSite(
-    int site, ScistPerfPhyCluster &clusChangedMut,
-    ScistPerfPhyCluster &clusChangedHomoMut) const {
-  ScistHaplotypeMat *pHapMat =
-      dynamic_cast<ScistHaplotypeMat *>(&this->genosInput);
-
-  if (pHapMat != NULL) {
-    clusChangedHomoMut.Clear();
-    return CalcProbMaxForSiteHap(site, clusChangedMut);
-  } else // right now, must be of genotype
-  {
-    return CalcProbMaxForSiteGeno(site, clusChangedMut, clusChangedHomoMut);
-  }
-}
+double ScistPerfPhyProbOnTree ::CalcProbMaxForSite(int site, ScistPerfPhyCluster &clusChangedMut, ScistPerfPhyCluster &clusChangedHomoMut) const
+{
+    ScistHaplotypeMat *pHapMat = dynamic_cast<ScistHaplotypeMat *>(&this->genosInput);
 
-double ScistPerfPhyProbOnTree ::CalcProbMaxForSiteHap(
-    int site, ScistPerfPhyCluster &clusChanged) const {
-  // cout << "ScoreTreeWithSite: tree: " << tree.GetNewick() << ", site: " <<
-  // site << endl;
-  // score the site wrt the tree (i.e. find the best split of the tree for this
-  // site)
-  double res = -1.0 * HAP_MAX_INT;
-  // do a bottom up
-  vector<double> listNodeSplitProb;
-  // init to be bad
-  for (int node = 0; node < mtree.GetTotNodesNum(); ++node) {
-    listNodeSplitProb.push_back(-1.0 * HAP_MAX_INT);
-  }
-
-  // cout << "CalcProbMaxForSiteHap: mtree: " << mtree.GetNewickSorted(false) <<
-  // endl; mtree.Dump();
-
-  int nodeOpt = -1;
-  for (int node = 0; node < mtree.GetTotNodesNum(); ++node) {
-    // cout << "node " << node << endl;
-    if (node == mtree.GetRoot()) {
-      // continue;
+    if (pHapMat != NULL)
+    {
+        clusChangedHomoMut.Clear();
+        return CalcProbMaxForSiteHap(site, clusChangedMut);
     }
-    double logpStep;
-    if (mtree.IsLeaf(node)) {
-      // a single leaf in the split
-      int lvlbl = mtree.GetLabel(node) - 1;
-      // cout << "Leaf: " << lvlbl << endl;
-      double p0 = this->genosInput.GetGenotypeProbAllele0At(lvlbl, site);
-      if (p0 < YW_VERY_SMALL_FRACTION) {
-        p0 = YW_VERY_SMALL_FRACTION;
-      } else if (p0 > 1.0 - YW_VERY_SMALL_FRACTION) {
-        p0 = 1.0 - YW_VERY_SMALL_FRACTION;
-      }
-      logpStep = log((1 - p0) / p0);
-      // cout << "Set leaf " << node << " log prob to: " << logpStep << ", p0="
-      // << p0 << endl;
-    } else {
-      // get the two children and add them up
-      int childLeft = mtree.GetLeftDescendant(node);
-      int childRight = mtree.GetRightDescendant(node);
-      // cout << "node: " << node << ", childLeft: " << childLeft << ",
-      // childRight: " << childRight << endl; cout << "childLeft: " << childLeft
-      // << ", right: " << childRight << endl;
-
-      YW_ASSERT_INFO(listNodeSplitProb[childLeft] > -1.0 * HAP_MAX_INT,
-                     "Bad left");
-      YW_ASSERT_INFO(listNodeSplitProb[childRight] > -1.0 * HAP_MAX_INT,
-                     "Bad right1");
-      logpStep = listNodeSplitProb[childLeft] + listNodeSplitProb[childRight];
+    else // right now, must be of genotype
+    {
+        return CalcProbMaxForSiteGeno(site, clusChangedMut, clusChangedHomoMut);
     }
-    // cout << "log prob: " << logpStep << " for node: " << node << endl;
-    listNodeSplitProb[node] = logpStep;
-    if (logpStep > res) {
-      // cout << "Better at node: " << node << endl;
-      res = logpStep;
-      nodeOpt = node;
+}
+
+double ScistPerfPhyProbOnTree ::CalcProbMaxForSiteHap(int site, ScistPerfPhyCluster &clusChanged) const
+{
+    //cout << "ScoreTreeWithSite: tree: " << tree.GetNewick() << ", site: " << site << endl;
+    // score the site wrt the tree (i.e. find the best split of the tree for this site)
+    double res = -1.0 * HAP_MAX_INT;
+    // do a bottom up
+    vector<double> listNodeSplitProb;
+    // init to be bad
+    for (int node = 0; node < mtree.GetTotNodesNum(); ++node)
+    {
+        listNodeSplitProb.push_back(-1.0 * HAP_MAX_INT);
     }
-  }
 
-  set<int> nodeOptSplitLbls;
+    //cout << "CalcProbMaxForSiteHap: mtree: " << mtree.GetNewickSorted(false) << endl;
+    //mtree.Dump();
+
+    int nodeOpt = -1;
+    for (int node = 0; node < mtree.GetTotNodesNum(); ++node)
+    {
+        //cout << "node " << node << endl;
+        if (node == mtree.GetRoot())
+        {
+            //continue;
+        }
+        double logpStep;
+        if (mtree.IsLeaf(node))
+        {
+            // a single leaf in the split
+            int lvlbl = mtree.GetLabel(node) - 1;
+            //cout << "Leaf: " << lvlbl << endl;
+            double p0 = this->genosInput.GetGenotypeProbAllele0At(lvlbl, site);
+            if (p0 < YW_VERY_SMALL_FRACTION)
+            {
+                p0 = YW_VERY_SMALL_FRACTION;
+            }
+            else if (p0 > 1.0 - YW_VERY_SMALL_FRACTION)
+            {
+                p0 = 1.0 - YW_VERY_SMALL_FRACTION;
+            }
+            logpStep = log((1 - p0) / p0);
+            //cout << "Set leaf " << node << " log prob to: " << logpStep << ", p0=" << p0 << endl;
+        }
+        else
+        {
+            // get the two children and add them up
+            int childLeft = mtree.GetLeftDescendant(node);
+            int childRight = mtree.GetRightDescendant(node);
+            //cout << "node: " << node << ", childLeft: " << childLeft << ", childRight: " << childRight << endl;
+            //cout << "childLeft: " << childLeft << ", right: " << childRight << endl;
+
+            YW_ASSERT_INFO(listNodeSplitProb[childLeft] > -1.0 * HAP_MAX_INT, "Bad left");
+            YW_ASSERT_INFO(listNodeSplitProb[childRight] > -1.0 * HAP_MAX_INT, "Bad right1");
+            logpStep = listNodeSplitProb[childLeft] + listNodeSplitProb[childRight];
+        }
+        //cout << "log prob: " << logpStep << " for node: " << node << endl;
+        listNodeSplitProb[node] = logpStep;
+        if (logpStep > res)
+        {
+            //cout << "Better at node: " << node << endl;
+            res = logpStep;
+            nodeOpt = node;
+        }
+    }
 
-  // if nothing is good, just take all-0
-  if (res < 0.0) {
-    //
-    res = 0;
-    nodeOpt = -1;
-  } else {
-    YW_ASSERT_INFO(nodeOpt >= 0, "Node not found");
-    set<int> nodeOptSplit;
-    mtree.GetLeavesUnder(nodeOpt, nodeOptSplit);
-    mtree.GetlabelsFor(nodeOptSplit, nodeOptSplitLbls);
-    DecAllNumInSet(nodeOptSplitLbls);
-  }
-  ScistPerfPhyCluster clus(nodeOptSplitLbls);
-  clusChanged = clus;
-  // cout << "Max prob at this site: " << res + this->listSitePriorScore[site]
-  // << " at site " << nodeOpt << endl; cout << "clust changed: ";
-  // clusChanged.Dump();
-  return res + this->listSitePriorScore[site];
-}
+    set<int> nodeOptSplitLbls;
 
-double ScistPerfPhyProbOnTree ::CalcProbMaxForSiteGeno(
-    int site, ScistPerfPhyCluster &clusChangedHetero,
-    ScistPerfPhyCluster &clusChangedHomo) const {
-  //
-  set<int> setSC0, setSC1, setSC2;
-  this->genosInput.GetRowsWithGenoAtSite(site, 0, setSC0);
-  this->genosInput.GetRowsWithGenoAtSite(site, 1, setSC1);
-  this->genosInput.GetRowsWithGenoAtSite(site, 2, setSC2);
-
-  // first accumulate for each node, the sum of diff p1/p0
-  vector<double> vecSumDiffP10, vecSumDiffP21;
-  vector<double> vecMaxSumDiff21;
-  vector<int> vecMaxSumDiff21Node;
-  for (int node = 0; node < mtree.GetTotNodesNum(); ++node) {
-    double logpStep, logpStep2;
-    if (mtree.IsLeaf(node)) {
-      // a single leaf in the split
-      int lvlbl = mtree.GetLabel(node) - 1;
-      // cout << "Leaf: " << lvlbl << endl;
-      double p0 = this->genosInput.GetGenotypeProbAt(lvlbl, site, 0);
-      double p1 = this->genosInput.GetGenotypeProbAt(lvlbl, site, 1);
-      double p2 = this->genosInput.GetGenotypeProbAt(lvlbl, site, 2);
-      logpStep = log(p1 / p0);
-      logpStep2 = log(p2 / p1);
-      vecMaxSumDiff21.push_back(logpStep2);
-      vecMaxSumDiff21Node.push_back(node);
-    } else {
-      // get the two children and add them up
-      int childLeft = mtree.GetLeftDescendant(node);
-      int childRight = mtree.GetRightDescendant(node);
-      // cout << "childLeft: " << childLeft << ", right: " << childRight <<
-      // endl;
-
-      YW_ASSERT_INFO(vecSumDiffP10[childLeft] > -1.0 * HAP_MAX_INT,
-                     "Bad left (geno)");
-      YW_ASSERT_INFO(vecSumDiffP10[childRight] > -1.0 * HAP_MAX_INT,
-                     "Bad right2");
-      logpStep = vecSumDiffP10[childLeft] + vecSumDiffP10[childRight];
-      logpStep2 = vecSumDiffP21[childLeft] + vecSumDiffP21[childRight];
-
-      double maxSumLogp21 = logpStep2;
-      int nodeMax = node;
-      if (vecSumDiffP21[childLeft] > maxSumLogp21) {
-        maxSumLogp21 = vecSumDiffP21[childLeft];
-        nodeMax = vecMaxSumDiff21Node[childLeft];
-      }
-      if (vecSumDiffP21[childRight] > maxSumLogp21) {
-        maxSumLogp21 = vecSumDiffP21[childRight];
-        nodeMax = vecMaxSumDiff21Node[childRight];
-      }
-      vecMaxSumDiff21.push_back(maxSumLogp21);
-      vecMaxSumDiff21Node.push_back(nodeMax);
+    // if nothing is good, just take all-0
+    if (res < 0.0)
+    {
+        //
+        res = 0;
+        nodeOpt = -1;
     }
-    // cout << "log prob: " << logpStep << endl;
-    vecSumDiffP10.push_back(logpStep);
-    vecSumDiffP21.push_back(logpStep2);
-  }
-
-  // do another scan to find the best
-  double res = -1.0 * HAP_MAX_INT;
-  int node1 = -1, node2 = -1;
-  for (int node = 0; node < mtree.GetTotNodesNum(); ++node) {
-    double p2Part = 0.0;
-    double node2MaxUse = -1;
-    if (vecMaxSumDiff21[node] > 0.0) {
-      p2Part = vecMaxSumDiff21[node];
-      node2MaxUse = vecMaxSumDiff21Node[node];
+    else
+    {
+        YW_ASSERT_INFO(nodeOpt >= 0, "Node not found");
+        set<int> nodeOptSplit;
+        mtree.GetLeavesUnder(nodeOpt, nodeOptSplit);
+        mtree.GetlabelsFor(nodeOptSplit, nodeOptSplitLbls);
+        DecAllNumInSet(nodeOptSplitLbls);
     }
-    if (vecSumDiffP10[node] + p2Part > res) {
-      res = vecSumDiffP10[node] + p2Part;
+    ScistPerfPhyCluster clus(nodeOptSplitLbls);
+    clusChanged = clus;
+    //cout << "Max prob at this site: " << res + this->listSitePriorScore[site] << " at site " << nodeOpt << endl;
+    //cout << "clust changed: ";
+    //clusChanged.Dump();
+    return res + this->listSitePriorScore[site];
+}
 
-      node1 = node;
-      node2 = node2MaxUse;
+double ScistPerfPhyProbOnTree ::CalcProbMaxForSiteGeno(int site, ScistPerfPhyCluster &clusChangedHetero, ScistPerfPhyCluster &clusChangedHomo) const
+{
+    //
+    set<int> setSC0, setSC1, setSC2;
+    this->genosInput.GetRowsWithGenoAtSite(site, 0, setSC0);
+    this->genosInput.GetRowsWithGenoAtSite(site, 1, setSC1);
+    this->genosInput.GetRowsWithGenoAtSite(site, 2, setSC2);
+
+    // first accumulate for each node, the sum of diff p1/p0
+    vector<double> vecSumDiffP10, vecSumDiffP21;
+    vector<double> vecMaxSumDiff21;
+    vector<int> vecMaxSumDiff21Node;
+    for (int node = 0; node < mtree.GetTotNodesNum(); ++node)
+    {
+        double logpStep, logpStep2;
+        if (mtree.IsLeaf(node))
+        {
+            // a single leaf in the split
+            int lvlbl = mtree.GetLabel(node) - 1;
+            //cout << "Leaf: " << lvlbl << endl;
+            double p0 = this->genosInput.GetGenotypeProbAt(lvlbl, site, 0);
+            double p1 = this->genosInput.GetGenotypeProbAt(lvlbl, site, 1);
+            double p2 = this->genosInput.GetGenotypeProbAt(lvlbl, site, 2);
+            logpStep = log(p1 / p0);
+            logpStep2 = log(p2 / p1);
+            vecMaxSumDiff21.push_back(logpStep2);
+            vecMaxSumDiff21Node.push_back(node);
+        }
+        else
+        {
+            // get the two children and add them up
+            int childLeft = mtree.GetLeftDescendant(node);
+            int childRight = mtree.GetRightDescendant(node);
+            //cout << "childLeft: " << childLeft << ", right: " << childRight << endl;
+
+            YW_ASSERT_INFO(vecSumDiffP10[childLeft] > -1.0 * HAP_MAX_INT, "Bad left (geno)");
+            YW_ASSERT_INFO(vecSumDiffP10[childRight] > -1.0 * HAP_MAX_INT, "Bad right2");
+            logpStep = vecSumDiffP10[childLeft] + vecSumDiffP10[childRight];
+            logpStep2 = vecSumDiffP21[childLeft] + vecSumDiffP21[childRight];
+
+            double maxSumLogp21 = logpStep2;
+            int nodeMax = node;
+            if (vecSumDiffP21[childLeft] > maxSumLogp21)
+            {
+                maxSumLogp21 = vecSumDiffP21[childLeft];
+                nodeMax = vecMaxSumDiff21Node[childLeft];
+            }
+            if (vecSumDiffP21[childRight] > maxSumLogp21)
+            {
+                maxSumLogp21 = vecSumDiffP21[childRight];
+                nodeMax = vecMaxSumDiff21Node[childRight];
+            }
+            vecMaxSumDiff21.push_back(maxSumLogp21);
+            vecMaxSumDiff21Node.push_back(nodeMax);
+        }
+        //cout << "log prob: " << logpStep << endl;
+        vecSumDiffP10.push_back(logpStep);
+        vecSumDiffP21.push_back(logpStep2);
     }
-  }
 
-  // figure out the genos
-  set<int> dummy;
-  ScistPerfPhyCluster clusDummy(dummy);
-  if (res < 0.0) {
-    //
-    clusChangedHetero = clusDummy;
-    clusChangedHomo = clusDummy;
-  } else {
-    YW_ASSERT_INFO(node1 >= 0, "Wrong");
-    set<int> nodeOptSplit, nodeOptSplitLbls;
-    mtree.GetLeavesUnder(node1, nodeOptSplit);
-    mtree.GetlabelsFor(nodeOptSplit, nodeOptSplitLbls);
-    DecAllNumInSet(nodeOptSplitLbls);
-    set<int> nodeOptSplitLbls2;
-    if (node2 >= 0) {
-      set<int> nodeOptSplit2;
-      mtree.GetLeavesUnder(node2, nodeOptSplit2);
-      mtree.GetlabelsFor(nodeOptSplit2, nodeOptSplitLbls2);
-      DecAllNumInSet(nodeOptSplitLbls2);
+    // do another scan to find the best
+    double res = -1.0 * HAP_MAX_INT;
+    int node1 = -1, node2 = -1;
+    for (int node = 0; node < mtree.GetTotNodesNum(); ++node)
+    {
+        double p2Part = 0.0;
+        double node2MaxUse = -1;
+        if (vecMaxSumDiff21[node] > 0.0)
+        {
+            p2Part = vecMaxSumDiff21[node];
+            node2MaxUse = vecMaxSumDiff21Node[node];
+        }
+        if (vecSumDiffP10[node] + p2Part > res)
+        {
+            res = vecSumDiffP10[node] + p2Part;
+
+            node1 = node;
+            node2 = node2MaxUse;
+        }
     }
-    SubtractSets(nodeOptSplitLbls, nodeOptSplitLbls2);
 
-    ScistPerfPhyCluster clus1(nodeOptSplitLbls);
-    clusChangedHetero = clus1;
-    ScistPerfPhyCluster clus2(nodeOptSplitLbls2);
-    clusChangedHomo = clus2;
-  }
+    // figure out the genos
+    set<int> dummy;
+    ScistPerfPhyCluster clusDummy(dummy);
+    if (res < 0.0)
+    {
+        //
+        clusChangedHetero = clusDummy;
+        clusChangedHomo = clusDummy;
+    }
+    else
+    {
+        YW_ASSERT_INFO(node1 >= 0, "Wrong");
+        set<int> nodeOptSplit, nodeOptSplitLbls;
+        mtree.GetLeavesUnder(node1, nodeOptSplit);
+        mtree.GetlabelsFor(nodeOptSplit, nodeOptSplitLbls);
+        DecAllNumInSet(nodeOptSplitLbls);
+        set<int> nodeOptSplitLbls2;
+        if (node2 >= 0)
+        {
+            set<int> nodeOptSplit2;
+            mtree.GetLeavesUnder(node2, nodeOptSplit2);
+            mtree.GetlabelsFor(nodeOptSplit2, nodeOptSplitLbls2);
+            DecAllNumInSet(nodeOptSplitLbls2);
+        }
+        SubtractSets(nodeOptSplitLbls, nodeOptSplitLbls2);
+
+        ScistPerfPhyCluster clus1(nodeOptSplitLbls);
+        clusChangedHetero = clus1;
+        ScistPerfPhyCluster clus2(nodeOptSplitLbls2);
+        clusChangedHomo = clus2;
+    }
 
-  return res + this->listSitePriorScore[site];
+    return res + this->listSitePriorScore[site];
 }
 
-double ScistPerfPhyProbOnTree ::CalcProbForSite(
-    int site, double totEdgeLen, const vector<set<int> > &listClades) const {
-  ScistHaplotypeMat *pHapMat =
-      dynamic_cast<ScistHaplotypeMat *>(&this->genosInput);
-
-  if (pHapMat != NULL) {
-    return CalcProbForSiteHap(site, totEdgeLen, listClades);
-  } else // right now, must be of genotype
-  {
-    return CalcProbForSiteGeno(site, totEdgeLen, listClades);
-  }
+double ScistPerfPhyProbOnTree ::CalcProbForSite(int site, double totEdgeLen, const vector<set<int>> &listClades) const
+{
+    ScistHaplotypeMat *pHapMat = dynamic_cast<ScistHaplotypeMat *>(&this->genosInput);
+
+    if (pHapMat != NULL)
+    {
+        return CalcProbForSiteHap(site, totEdgeLen, listClades);
+    }
+    else // right now, must be of genotype
+    {
+        return CalcProbForSiteGeno(site, totEdgeLen, listClades);
+    }
 }
 
-double ScistPerfPhyProbOnTree ::CalcProbForSiteHap(
-    int site, double totEdgeLen, const vector<set<int> > &listClades) const {
-  vector<double> listCladeProb;
-  for (int i = 0; i < mtree.GetTotNodesNum(); ++i) {
-    listCladeProb.push_back(-1.0 * HAP_MAX_INT);
-  }
-
-  // get the sum of prob0
-  double sumProb0 = 0.0;
-  for (int h = 0; h < this->genosInput.GetNumHaps(); ++h) {
-    sumProb0 += log(this->genosInput.GetGenotypeProbAllele0At(h, site));
-  }
-
-  double loglikeTot = -1.0 * HAP_MAX_INT;
-  for (int i = 0; i < mtree.GetTotNodesNum(); ++i) {
-    if (i == mtree.GetRoot()) {
-      continue;
+double ScistPerfPhyProbOnTree ::CalcProbForSiteHap(int site, double totEdgeLen, const vector<set<int>> &listClades) const
+{
+    vector<double> listCladeProb;
+    for (int i = 0; i < mtree.GetTotNodesNum(); ++i)
+    {
+        listCladeProb.push_back(-1.0 * HAP_MAX_INT);
     }
-    double brLen = mtree.GetEdgeLen(i);
-    double probPrior = brLen / totEdgeLen;
-    double probCladeOnly = 0.0;
-    if (mtree.IsLeaf(i)) {
-      int lbl = *listClades[i].begin();
-      double p0 = this->genosInput.GetGenotypeProbAllele0At(lbl, site);
-      double p1 = 1 - p0;
-      double pr = log(p1 / p0);
-      probCladeOnly = pr;
-    } else {
-      int childLeft = mtree.GetLeftDescendant(i);
-      int childRight = mtree.GetRightDescendant(i);
-      probCladeOnly = listCladeProb[childLeft] + listCladeProb[childRight];
+
+    // get the sum of prob0
+    double sumProb0 = 0.0;
+    for (int h = 0; h < this->genosInput.GetNumHaps(); ++h)
+    {
+        sumProb0 += log(this->genosInput.GetGenotypeProbAllele0At(h, site));
     }
-    // cout << "probPrior: " << probPrior << endl;
-    listCladeProb[i] = probCladeOnly + log(probPrior);
-    // double probClade = CalcProbMutClade(site, listClades[i] );
-    // YW: need to check this
-    // loglikeTot = GetLogSumOfTwo(loglikeTot, log(probPrior) + probCladeOnly);
-    if (loglikeTot < listCladeProb[i]) {
-      loglikeTot = listCladeProb[i];
+
+    double loglikeTot = -1.0 * HAP_MAX_INT;
+    for (int i = 0; i < mtree.GetTotNodesNum(); ++i)
+    {
+        if (i == mtree.GetRoot())
+        {
+            continue;
+        }
+        double brLen = mtree.GetEdgeLen(i);
+        double probPrior = brLen / totEdgeLen;
+        double probCladeOnly = 0.0;
+        if (mtree.IsLeaf(i))
+        {
+            int lbl = *listClades[i].begin();
+            double p0 = this->genosInput.GetGenotypeProbAllele0At(lbl, site);
+            double p1 = 1 - p0;
+            double pr = log(p1 / p0);
+            probCladeOnly = pr;
+        }
+        else
+        {
+            int childLeft = mtree.GetLeftDescendant(i);
+            int childRight = mtree.GetRightDescendant(i);
+            probCladeOnly = listCladeProb[childLeft] + listCladeProb[childRight];
+        }
+        //cout << "probPrior: " << probPrior << endl;
+        listCladeProb[i] = probCladeOnly + log(probPrior);
+        //double probClade = CalcProbMutClade(site, listClades[i] );
+        // YW: need to check this
+        //loglikeTot = GetLogSumOfTwo(loglikeTot, log(probPrior) + probCladeOnly);
+        if (loglikeTot < listCladeProb[i])
+        {
+            loglikeTot = listCladeProb[i];
+        }
     }
-  }
-  // return loglikeTot + sumProb0;
-  double res = loglikeTot + sumProb0;
-  // cout << "log prob at site: " << res << endl;
-  return res;
+    //return loglikeTot + sumProb0;
+    double res = loglikeTot + sumProb0;
+    //cout << "log prob at site: " << res << endl;
+    return res;
 }
 
-double ScistPerfPhyProbOnTree ::CalcProbForSiteGeno(
-    int site, double totEdgeLen, const vector<set<int> > &listClades) const {
-  ScistPerfPhyProbOnTree *pthis = const_cast<ScistPerfPhyProbOnTree *>(this);
-  ScistPerfPhyProbOnTree spppt(pthis->genosInputHap, this->mtree);
-  return spppt.CalcProbForSite(2 * site, totEdgeLen, listClades) +
-         spppt.CalcProbForSite(2 * site + 1, totEdgeLen, listClades);
+double ScistPerfPhyProbOnTree ::CalcProbForSiteGeno(int site, double totEdgeLen, const vector<set<int>> &listClades) const
+{
+    ScistPerfPhyProbOnTree *pthis = const_cast<ScistPerfPhyProbOnTree *>(this);
+    ScistPerfPhyProbOnTree spppt(pthis->genosInputHap, this->mtree);
+    return spppt.CalcProbForSite(2 * site, totEdgeLen, listClades) + spppt.CalcProbForSite(2 * site + 1, totEdgeLen, listClades);
 }
diff --git a/trisicell/external/scistree/ScistPerfPhyImp.hpp b/trisicell/external/scistree/ScistPerfPhyImp.hpp
index dded003..605c751 100644
--- a/trisicell/external/scistree/ScistPerfPhyImp.hpp
+++ b/trisicell/external/scistree/ScistPerfPhyImp.hpp
@@ -9,13 +9,15 @@
 #ifndef ScistPerfPhyImp_hpp
 #define ScistPerfPhyImp_hpp
 
-#include "ScistGenotype.hpp"
+#include <set>
+#include <vector>
+#include <map>
+#include <tuple>
 #include "ScistPerfPhyUtils.hpp"
+#include "ScistGenotype.hpp"
 #include "TreeBuilder.h"
 #include "UtilsNumerical.h"
-#include <map>
-#include <set>
-#include <vector>
+#include "ctpl_stl.h"
 
 class PhyloDistance;
 class MarginalTree;
@@ -23,148 +25,116 @@ class MarginalTree;
 // *************************************************************************************
 // Utiltiies
 
-void OutputMutationTree(const char *filenameMT, const string &strMutTree,
-                        bool fLabel);
+void OutputMutationTree(const char *filenameMT, const string &strMutTree, bool fLabel);
 
 // *************************************************************************************
 // Build phylogeny by tree search
 
 class ScistPerfPhyProbOnTree;
 
-class ScistPerfPhyMLE {
+class ScistPerfPhyMLE
+{
 public:
-  ScistPerfPhyMLE(ScistGenGenotypeMat &genos);
-  double Infer(
-      std::set<std::pair<std::pair<int, int>, int> > *plistChangedPlaces = NULL,
-      std::string *pstrTreeNW = NULL);
-  void SetVerbose(bool f) { fVerbose = f; }
-  void SetBrOpt(bool f) { fOptBrLen = f; }
-  void SetOutput(bool f) { fOutput = f; }
-  void SetPPOut(bool f) { fOutputPPWithEdgeLabels = f; }
-  void SetPPOutLabel(bool f) { fOutputLabel = f; }
-  void SetSPR(bool f) { fSPR = f; }
-  void SetSPRNum(int n) { maxSPRNum = n; }
-  void SetCellNames(const std::vector<std::string> &listCellNamesIn) {
-    listCellNames = listCellNamesIn;
-  }
-  void SetSiteNames(const std::vector<std::string> &listSiteNamesIn) {
-    listSiteNames = listSiteNamesIn;
-  }
-  void SetMutTreeFileName(const std::string &strMutTreeFileNameIn) {
-    this->strMutTreeFileName = strMutTreeFileNameIn;
-  }
-  static void GetNgbrTreesFrom(int numHaps, const std::string &strTree,
-                               std::set<std::string> &setNgbrTrees);
-  static void GetNgbrTreesFromSPR(int numHaps, const std::string &strTree,
-                                  std::set<std::string> &setNgbrTrees);
-  static std::string RemapLeafLbls(int numHaps, const std::string &strTree,
-                                   const std::map<int, int> &mapLabels);
+    ScistPerfPhyMLE(ScistGenGenotypeMat &genos);
+    double Infer(std::set<std::pair<std::pair<int, int>, int>> *plistChangedPlaces = NULL, std::string *pstrTreeNW = NULL);
+    void SetVerbose(bool f) { fVerbose = f; }
+    void SetBrOpt(bool f) { fOptBrLen = f; }
+    void SetOutput(bool f) { fOutput = f; }
+    void SetPPOut(bool f) { fOutputPPWithEdgeLabels = f; }
+    void SetPPOutLabel(bool f) { fOutputLabel = f; }
+    void SetSPR(bool f) { fSPR = f; }
+    void SetSPRNum(int n) { maxSPRNum = n; }
+    void SetCellNames(const std::vector<std::string> &listCellNamesIn) { listCellNames = listCellNamesIn; }
+    void SetSiteNames(const std::vector<std::string> &listSiteNamesIn) { listSiteNames = listSiteNamesIn; }
+    void SetMutTreeFileName(const std::string &strMutTreeFileNameIn) { this->strMutTreeFileName = strMutTreeFileNameIn; }
+    void SetNumThreads(int n) { numThreads = n; }
+    static void GetNgbrTreesFrom(int numHaps, const std::string &strTree, std::set<std::string> &setNgbrTrees);
+    static void GetNgbrTreesFromSPR(int numHaps, const std::string &strTree, std::set<std::string> &setNgbrTrees);
+    static std::string RemapLeafLbls(int numHaps, const std::string &strTree, const std::map<int, int> &mapLabels);
 
 private:
-  void Init();
-  std::string ConsTreeFromSetClusters(
-      const std::set<ScistPerfPhyCluster> &setClusters) const;
-  void FindChangedGenos(
-      int site,
-      const std::pair<ScistPerfPhyCluster, ScistPerfPhyCluster> &clusToAdd,
-      std::set<std::pair<std::pair<int, int>, int> > &listChangedPlaces) const;
-  static std::string RemapLeafLbls(int numHaps,
-                                   const std::string &strTree0Based,
-                                   const vector<int> &listLblsOld);
-  double
-  ScoreTree(const string &strTree,
-            std::vector<std::pair<ScistPerfPhyCluster, ScistPerfPhyCluster> >
-                &listChangedCluster) const;
-  double ScoreTreeWithSite(ScistPerfPhyProbOnTree &probTree, MarginalTree &tree,
-                           int site, ScistPerfPhyCluster &clusChanged1,
-                           ScistPerfPhyCluster &clusChanged2) const;
-  double CalcMaxProbUpperBound() const;
-  double OptBranchLens(const std::string &strTree, std::string &strTreeBrOpt);
-  double CalcChangedGenosProb(
-      const std::set<std::pair<std::pair<int, int>, int> > &listChangedPlaces)
-      const;
-  std::string ConvCellTreeStr(const std::string &strTree) const;
-  std::string ConvMutTreeStr(const std::string &strTree) const;
-
-  ScistGenGenotypeMat &genosInput;
-  std::vector<ScistPerfPhyCluster> listClusMutsInputHetero;
-  std::vector<ScistPerfPhyCluster> listClusMutsInputHomo;
-  std::vector<int> listInputColMulti;
-  ScistPerfPhyGuideTree treeGuide;
-  bool fVerbose;
-  bool fOptBrLen;
-  bool fOutput;
-  bool fOutputPPWithEdgeLabels;
-  bool fOutputLabel;
-  bool fSPR;
-  int maxSPRNum;
-  std::vector<double> listSitePriorScore;
-  std::vector<std::string> listCellNames;
-  std::vector<std::string> listSiteNames;
-  std::string strMutTreeFileName;
+    void Init();
+    std::string ConsTreeFromSetClusters(const std::set<ScistPerfPhyCluster> &setClusters) const;
+    void FindChangedGenos(int site, const std::pair<ScistPerfPhyCluster, ScistPerfPhyCluster> &clusToAdd, std::set<std::pair<std::pair<int, int>, int>> &listChangedPlaces) const;
+    static std::string RemapLeafLbls(int numHaps, const std::string &strTree0Based, const vector<int> &listLblsOld);
+    double ScoreTree(const string &strTree, std::vector<std::pair<ScistPerfPhyCluster, ScistPerfPhyCluster>> &listChangedCluster) const;
+    double ScoreTreeWithSite(ScistPerfPhyProbOnTree &probTree, MarginalTree &tree, int site, ScistPerfPhyCluster &clusChanged1, ScistPerfPhyCluster &clusChanged2) const;
+    double CalcMaxProbUpperBound() const;
+    double OptBranchLens(const std::string &strTree, std::string &strTreeBrOpt);
+    double CalcChangedGenosProb(const std::set<std::pair<std::pair<int, int>, int>> &listChangedPlaces) const;
+    std::string ConvCellTreeStr(const std::string &strTree) const;
+    std::string ConvMutTreeStr(const std::string &strTree) const;
+
+    ScistGenGenotypeMat &genosInput;
+    std::vector<ScistPerfPhyCluster> listClusMutsInputHetero;
+    std::vector<ScistPerfPhyCluster> listClusMutsInputHomo;
+    std::vector<int> listInputColMulti;
+    ScistPerfPhyGuideTree treeGuide;
+    bool fVerbose;
+    bool fOptBrLen;
+    bool fOutput;
+    bool fOutputPPWithEdgeLabels;
+    bool fOutputLabel;
+    bool fSPR;
+    int maxSPRNum;
+    std::vector<double> listSitePriorScore;
+    std::vector<std::string> listCellNames;
+    std::vector<std::string> listSiteNames;
+    std::string strMutTreeFileName;
+    int numThreads;
 };
 
 // *************************************************************************************
 // Build phylogeny by tree search with branch length
 
-class ScistFullPerfPhyMLE : public NumericalAlgoUtils {
+class ScistFullPerfPhyMLE : public NumericalAlgoUtils
+{
 public:
-  ScistFullPerfPhyMLE(ScistGenGenotypeMat &genos);
-  void Infer();
-  void SetVerbose(bool f) { fVerbose = f; }
-  virtual double EvaluateAt(double pt, void *pParam);
-  double OptBranchLens(MarginalTree &tree);
+    ScistFullPerfPhyMLE(ScistGenGenotypeMat &genos);
+    void Infer();
+    void SetVerbose(bool f) { fVerbose = f; }
+    virtual double EvaluateAt(double pt, void *pParam);
+    double OptBranchLens(MarginalTree &tree);
 
 private:
-  void Init();
-  double CalcLikelihoodOf(MarginalTree &tree) const;
-  double CalcLikelihoodOf(ScistPerfPhyProbOnTree &sppp, int site,
-                          MarginalTree &tree, double totEdgeLen,
-                          const std::vector<std::set<int> > &listClades) const;
-  std::string ConsTreeFromSetClusters(
-      const std::set<ScistPerfPhyCluster> &setClusters) const;
-
-  ScistGenGenotypeMat &genosInput;
-  // std::vector<ScistPerfPhyCluster> listClusMutsInput;
-  std::vector<ScistPerfPhyCluster> listClusMutsInputHetero;
-  std::vector<ScistPerfPhyCluster> listClusMutsInputHomo;
-  std::vector<int> listInputColMulti;
-  ScistPerfPhyGuideTree treeGuide;
-  bool fVerbose;
-  std::vector<std::map<std::set<int>, double> > cacheProbMutClades;
-  MarginalTree *pMargTreeOptBrLen;
-  int brOptIndex;
+    void Init();
+    double CalcLikelihoodOf(MarginalTree &tree) const;
+    double CalcLikelihoodOf(ScistPerfPhyProbOnTree &sppp, int site, MarginalTree &tree, double totEdgeLen, const std::vector<std::set<int>> &listClades) const;
+    std::string ConsTreeFromSetClusters(const std::set<ScistPerfPhyCluster> &setClusters) const;
+
+    ScistGenGenotypeMat &genosInput;
+    //std::vector<ScistPerfPhyCluster> listClusMutsInput;
+    std::vector<ScistPerfPhyCluster> listClusMutsInputHetero;
+    std::vector<ScistPerfPhyCluster> listClusMutsInputHomo;
+    std::vector<int> listInputColMulti;
+    ScistPerfPhyGuideTree treeGuide;
+    bool fVerbose;
+    std::vector<std::map<std::set<int>, double>> cacheProbMutClades;
+    MarginalTree *pMargTreeOptBrLen;
+    int brOptIndex;
 };
 
 // *************************************************************************************
 // Tree probability
 
-class ScistPerfPhyProbOnTree {
+class ScistPerfPhyProbOnTree
+{
 public:
-  ScistPerfPhyProbOnTree(ScistGenGenotypeMat &genos, MarginalTree &mtreeIn);
-  double CalcProbMaxForSite(int site, ScistPerfPhyCluster &clusChangedMut,
-                            ScistPerfPhyCluster &clusChangedHomoMut) const;
-  double CalcProbForSite(int site, double totEdgeLen,
-                         const std::vector<std::set<int> > &listClades) const;
+    ScistPerfPhyProbOnTree(ScistGenGenotypeMat &genos, MarginalTree &mtreeIn);
+    double CalcProbMaxForSite(int site, ScistPerfPhyCluster &clusChangedMut, ScistPerfPhyCluster &clusChangedHomoMut) const;
+    double CalcProbForSite(int site, double totEdgeLen, const std::vector<std::set<int>> &listClades) const;
 
 private:
-  void Init();
-  double CalcProbMaxForSiteHap(int site,
-                               ScistPerfPhyCluster &clusChanged) const;
-  double CalcProbMaxForSiteGeno(int site,
-                                ScistPerfPhyCluster &clusChangedHetero,
-                                ScistPerfPhyCluster &clusChangedHomo) const;
-  double
-  CalcProbForSiteHap(int site, double totEdgeLen,
-                     const std::vector<std::set<int> > &listClades) const;
-  double
-  CalcProbForSiteGeno(int site, double totEdgeLen,
-                      const std::vector<std::set<int> > &listClades) const;
-
-  ScistGenGenotypeMat &genosInput;
-  ScistHaplotypeMat genosInputHap;
-  MarginalTree &mtree;
-  std::vector<double> listSitePriorScore;
+    void Init();
+    double CalcProbMaxForSiteHap(int site, ScistPerfPhyCluster &clusChanged) const;
+    double CalcProbMaxForSiteGeno(int site, ScistPerfPhyCluster &clusChangedHetero, ScistPerfPhyCluster &clusChangedHomo) const;
+    double CalcProbForSiteHap(int site, double totEdgeLen, const std::vector<std::set<int>> &listClades) const;
+    double CalcProbForSiteGeno(int site, double totEdgeLen, const std::vector<std::set<int>> &listClades) const;
+
+    ScistGenGenotypeMat &genosInput;
+    ScistHaplotypeMat genosInputHap;
+    MarginalTree &mtree;
+    std::vector<double> listSitePriorScore;
 };
 
 #endif /* ScistPerfPhyImp_hpp */
diff --git a/trisicell/external/scistree/ScistPerfPhyUtils.cpp b/trisicell/external/scistree/ScistPerfPhyUtils.cpp
index c4ad551..c8aac6c 100644
--- a/trisicell/external/scistree/ScistPerfPhyUtils.cpp
+++ b/trisicell/external/scistree/ScistPerfPhyUtils.cpp
@@ -7,536 +7,503 @@
 //
 
 #include "ScistPerfPhyUtils.hpp"
-#include "PhylogenyTree.h"
 #include "ScistGenotype.hpp"
-#include "TreeBuilder.h"
 #include "Utils3.h"
+#include "PhylogenyTree.h"
 #include "Utils4.h"
-#include "UtilsNumerical.h"
+#include "TreeBuilder.h"
 #include <iomanip>
+#include "UtilsNumerical.h"
 
 // *************************************************************************************
 // Cluster
 
-void ScistPerfPhyClusterItor ::First() { it = clus.setMutSCs.begin(); }
-void ScistPerfPhyClusterItor ::Next() { ++it; }
-bool ScistPerfPhyClusterItor ::IsDone() { return it == clus.setMutSCs.end(); }
-int ScistPerfPhyClusterItor ::GetCurrentSC() const { return *it; }
+void ScistPerfPhyClusterItor ::First()
+{
+    it = clus.setMutSCs.begin();
+}
+void ScistPerfPhyClusterItor ::Next()
+{
+    ++it;
+}
+bool ScistPerfPhyClusterItor ::IsDone()
+{
+    return it == clus.setMutSCs.end();
+}
+int ScistPerfPhyClusterItor ::GetCurrentSC() const
+{
+    return *it;
+}
 
 // *************************************************************************************
 // Cluster
 
-ScistPerfPhyCluster ::ScistPerfPhyCluster() {}
+ScistPerfPhyCluster ::ScistPerfPhyCluster()
+{
+}
 
-ScistPerfPhyCluster ::ScistPerfPhyCluster(const std::set<int> &clus)
-    : setMutSCs(clus) {}
+ScistPerfPhyCluster ::ScistPerfPhyCluster(const std::set<int> &clus) : setMutSCs(clus)
+{
+}
 
-ScistPerfPhyCluster ::ScistPerfPhyCluster(const ScistPerfPhyCluster &rhs)
-    : setMutSCs(rhs.setMutSCs) {}
+ScistPerfPhyCluster ::ScistPerfPhyCluster(const ScistPerfPhyCluster &rhs) : setMutSCs(rhs.setMutSCs)
+{
+}
 
-ScistPerfPhyCluster &
-ScistPerfPhyCluster ::operator=(const ScistPerfPhyCluster &rhs) {
-  setMutSCs = rhs.setMutSCs;
-  return *this;
+ScistPerfPhyCluster &ScistPerfPhyCluster ::operator=(const ScistPerfPhyCluster &rhs)
+{
+    setMutSCs = rhs.setMutSCs;
+    return *this;
 }
 
-bool ScistPerfPhyCluster ::operator<(const ScistPerfPhyCluster &rhs) const {
-  return this->setMutSCs < rhs.setMutSCs;
+bool ScistPerfPhyCluster ::operator<(const ScistPerfPhyCluster &rhs) const
+{
+    return this->setMutSCs < rhs.setMutSCs;
 }
 
-void ScistPerfPhyCluster ::IntersectWith(
-    const ScistPerfPhyCluster &rhs, ScistPerfPhyCluster &clusInt,
-    ScistPerfPhyCluster &clusThisOnly, ScistPerfPhyCluster &clusRHSOnly) const {
-  //
-  JoinSets(this->setMutSCs, rhs.setMutSCs, clusInt.setMutSCs);
-  clusThisOnly = *this;
-  clusThisOnly.SubtractFrom(rhs);
-  clusRHSOnly = rhs;
-  clusRHSOnly.SubtractFrom(*this);
+void ScistPerfPhyCluster ::IntersectWith(const ScistPerfPhyCluster &rhs, ScistPerfPhyCluster &clusInt, ScistPerfPhyCluster &clusThisOnly, ScistPerfPhyCluster &clusRHSOnly) const
+{
+    //
+    JoinSets(this->setMutSCs, rhs.setMutSCs, clusInt.setMutSCs);
+    clusThisOnly = *this;
+    clusThisOnly.SubtractFrom(rhs);
+    clusRHSOnly = rhs;
+    clusRHSOnly.SubtractFrom(*this);
 }
 
-void ScistPerfPhyCluster ::SubtractFrom(const ScistPerfPhyCluster &rhs) {
-  //
-  SubtractSets(this->setMutSCs, rhs.setMutSCs);
+void ScistPerfPhyCluster ::SubtractFrom(const ScistPerfPhyCluster &rhs)
+{
+    //
+    SubtractSets(this->setMutSCs, rhs.setMutSCs);
 }
 
-void ScistPerfPhyCluster ::UnionWith(const ScistPerfPhyCluster &rhs) {
-  UnionSets(this->setMutSCs, rhs.setMutSCs);
+void ScistPerfPhyCluster ::UnionWith(const ScistPerfPhyCluster &rhs)
+{
+    UnionSets(this->setMutSCs, rhs.setMutSCs);
 }
 
-void ScistPerfPhyCluster ::GetGenoBinVec(int numHaps,
-                                         vector<int> &vecGeno) const {
-  vecGeno.clear();
-  for (int i = 0; i < numHaps; ++i) {
-    int g = 0;
-    if (this->setMutSCs.find(i) != this->setMutSCs.end()) {
-      g = 1;
+void ScistPerfPhyCluster ::GetGenoBinVec(int numHaps, vector<int> &vecGeno) const
+{
+    vecGeno.clear();
+    for (int i = 0; i < numHaps; ++i)
+    {
+        int g = 0;
+        if (this->setMutSCs.find(i) != this->setMutSCs.end())
+        {
+            g = 1;
+        }
+        vecGeno.push_back(g);
     }
-    vecGeno.push_back(g);
-  }
 }
 
-bool ScistPerfPhyCluster ::IsCompatibleWith(
-    const ScistPerfPhyCluster &rhs) const {
-  // YW: assume rooted compatibility (i.e. three gamates test)
-  ScistPerfPhyCluster clusInt, clusThisOnly, clusRHSOnly;
-  IntersectWith(rhs, clusInt, clusThisOnly, clusRHSOnly);
-  return clusInt.GetSize() == 0 || clusThisOnly.GetSize() == 0 ||
-         clusRHSOnly.GetSize() == 0;
+bool ScistPerfPhyCluster ::IsCompatibleWith(const ScistPerfPhyCluster &rhs) const
+{
+    // YW: assume rooted compatibility (i.e. three gamates test)
+    ScistPerfPhyCluster clusInt, clusThisOnly, clusRHSOnly;
+    IntersectWith(rhs, clusInt, clusThisOnly, clusRHSOnly);
+    return clusInt.GetSize() == 0 || clusThisOnly.GetSize() == 0 || clusRHSOnly.GetSize() == 0;
 }
 
-bool ScistPerfPhyCluster ::IsCompatibleWith(
-    const std::set<ScistPerfPhyCluster> &setClus) const {
-  for (set<ScistPerfPhyCluster>::const_iterator it = setClus.begin();
-       it != setClus.end(); ++it) {
-    if (IsCompatibleWith(*it) == false) {
-      return false;
+bool ScistPerfPhyCluster ::IsCompatibleWith(const std::set<ScistPerfPhyCluster> &setClus) const
+{
+    for (set<ScistPerfPhyCluster>::const_iterator it = setClus.begin(); it != setClus.end(); ++it)
+    {
+        if (IsCompatibleWith(*it) == false)
+        {
+            return false;
+        }
     }
-  }
-  return true;
+    return true;
 }
 
-void ScistPerfPhyCluster ::GetSplitPartsWith(
-    const ScistPerfPhyCluster &rhs,
-    std::vector<std::set<int> > &listParts) const {
-  // get 10, 01, and 11
-  ScistPerfPhyCluster clusInt, clusThisOnly, clusRHSOnly;
-  IntersectWith(rhs, clusInt, clusThisOnly, clusRHSOnly);
-  //
-  listParts.push_back(clusThisOnly.setMutSCs);
-  listParts.push_back(clusRHSOnly.setMutSCs);
-  listParts.push_back(clusInt.setMutSCs);
+void ScistPerfPhyCluster ::GetSplitPartsWith(const ScistPerfPhyCluster &rhs, std::vector<std::set<int>> &listParts) const
+{
+    // get 10, 01, and 11
+    ScistPerfPhyCluster clusInt, clusThisOnly, clusRHSOnly;
+    IntersectWith(rhs, clusInt, clusThisOnly, clusRHSOnly);
+    //
+    listParts.push_back(clusThisOnly.setMutSCs);
+    listParts.push_back(clusRHSOnly.setMutSCs);
+    listParts.push_back(clusInt.setMutSCs);
 }
 
-void ScistPerfPhyCluster ::FlipAlleleAt(int r) {
-  // if row r is in the cluster, remove it; otherwise add it
-  if (setMutSCs.find(r) != setMutSCs.end()) {
-    setMutSCs.erase(r);
-  } else {
-    setMutSCs.insert(r);
-  }
+void ScistPerfPhyCluster ::FlipAlleleAt(int r)
+{
+    // if row r is in the cluster, remove it; otherwise add it
+    if (setMutSCs.find(r) != setMutSCs.end())
+    {
+        setMutSCs.erase(r);
+    }
+    else
+    {
+        setMutSCs.insert(r);
+    }
 }
 
-int ScistPerfPhyCluster ::GetAlleleAt(int r) const {
-  if (setMutSCs.find(r) != setMutSCs.end()) {
-    return 1;
-  } else {
-    return 0;
-  }
+int ScistPerfPhyCluster ::GetAlleleAt(int r) const
+{
+    if (setMutSCs.find(r) != setMutSCs.end())
+    {
+        return 1;
+    }
+    else
+    {
+        return 0;
+    }
 }
 
-void ScistPerfPhyCluster ::Dump() const { DumpIntSet(setMutSCs); }
+void ScistPerfPhyCluster ::Dump() const
+{
+    DumpIntSet(setMutSCs);
+}
 
 // *************************************************************************************
 // Cluster partial order tree node
 
-ScistPerfPhyClusTreeNode ::~ScistPerfPhyClusTreeNode() {}
+ScistPerfPhyClusTreeNode ::~ScistPerfPhyClusTreeNode()
+{
+}
+
+ScistPerfPhyClusTreeNode *ScistPerfPhyClusTreeNode ::ConsClusterTree(const std::map<int, ScistPerfPhyCluster> &setSeedSites, bool fNoDup)
+{
+    // the root has no clus attached (i.e. contains everything)
+    ScistPerfPhyClusTreeNode *pTreeRoot = new ScistPerfPhyClusTreeNode(NULL);
+    set<ScistPerfPhyCluster> setClusDone;
+
+    for (map<int, ScistPerfPhyCluster>::const_iterator it = setSeedSites.begin(); it != setSeedSites.end(); ++it)
+    {
+        if (fNoDup)
+        {
+            if (setClusDone.find(it->second) != setClusDone.end())
+            {
+                continue;
+            }
+        }
+
+        //cout << "Init cluster tree: node: ";
+        //it->second.Dump();
+        ScistPerfPhyClusTreeNode *pNode = new ScistPerfPhyClusTreeNode(&(it->second));
+        pTreeRoot->InsertNode(pNode);
 
-ScistPerfPhyClusTreeNode *ScistPerfPhyClusTreeNode ::ConsClusterTree(
-    const std::map<int, ScistPerfPhyCluster> &setSeedSites, bool fNoDup) {
-  // the root has no clus attached (i.e. contains everything)
-  ScistPerfPhyClusTreeNode *pTreeRoot = new ScistPerfPhyClusTreeNode(NULL);
-  set<ScistPerfPhyCluster> setClusDone;
+        setClusDone.insert(it->second);
+    }
+    return pTreeRoot;
+}
+ScistPerfPhyClusTreeNode *ScistPerfPhyClusTreeNode ::ConsClusterTree(const std::set<ScistPerfPhyCluster> &setSeedSites)
+{
+    // the root has no clus attached (i.e. contains everything)
+    ScistPerfPhyClusTreeNode *pTreeRoot = new ScistPerfPhyClusTreeNode(NULL);
 
-  for (map<int, ScistPerfPhyCluster>::const_iterator it = setSeedSites.begin();
-       it != setSeedSites.end(); ++it) {
-    if (fNoDup) {
-      if (setClusDone.find(it->second) != setClusDone.end()) {
-        continue;
-      }
+    for (set<ScistPerfPhyCluster>::const_iterator it = setSeedSites.begin(); it != setSeedSites.end(); ++it)
+    {
+        //cout << "Init cluster tree: node: ";
+        //it->second.Dump();
+        ScistPerfPhyClusTreeNode *pNode = new ScistPerfPhyClusTreeNode(&(*it));
+        pTreeRoot->InsertNode(pNode);
     }
+    return pTreeRoot;
+}
 
-    // cout << "Init cluster tree: node: ";
-    // it->second.Dump();
-    ScistPerfPhyClusTreeNode *pNode =
-        new ScistPerfPhyClusTreeNode(&(it->second));
-    pTreeRoot->InsertNode(pNode);
-
-    setClusDone.insert(it->second);
-  }
-  return pTreeRoot;
-}
-ScistPerfPhyClusTreeNode *ScistPerfPhyClusTreeNode ::ConsClusterTree(
-    const std::set<ScistPerfPhyCluster> &setSeedSites) {
-  // the root has no clus attached (i.e. contains everything)
-  ScistPerfPhyClusTreeNode *pTreeRoot = new ScistPerfPhyClusTreeNode(NULL);
-
-  for (set<ScistPerfPhyCluster>::const_iterator it = setSeedSites.begin();
-       it != setSeedSites.end(); ++it) {
-    // cout << "Init cluster tree: node: ";
-    // it->second.Dump();
-    ScistPerfPhyClusTreeNode *pNode = new ScistPerfPhyClusTreeNode(&(*it));
-    pTreeRoot->InsertNode(pNode);
-  }
-  return pTreeRoot;
-}
-
-void ScistPerfPhyClusTreeNode ::AddChild(ScistPerfPhyClusTreeNode *pChild) {
-  //
-  listChildren.push_back(pChild);
-  pChild->SetParent(this);
-}
-
-void ScistPerfPhyClusTreeNode ::RemoveChild(ScistPerfPhyClusTreeNode *pChild) {
-  //
-  pChild->SetParent(NULL);
-  listChildren.erase(
-      std::remove(listChildren.begin(), listChildren.end(), pChild),
-      listChildren.end());
-}
-
-void ScistPerfPhyClusTreeNode ::InsertNode(ScistPerfPhyClusTreeNode *pNode) {
-  // cout << "Insert node: ";
-  // pNode->Dump();
-  // cout << " under parent node: ";
-  // Dump();
-  // insert this node below it; may need to split children if there are multiple
-  // ones we assume there is no incompatibility occuring here
-  vector<ScistPerfPhyClusTreeNode *> listChildrenContained;
-  for (int i = 0; i < GetNumChildren(); ++i) {
+void ScistPerfPhyClusTreeNode ::AddChild(ScistPerfPhyClusTreeNode *pChild)
+{
     //
-    ScistPerfPhyCluster clusInt, clusThisOnly, clusRHSOnly;
-    pNode->GetClus()->IntersectWith(*GetChild(i)->GetClus(), clusInt,
-                                    clusThisOnly, clusRHSOnly);
-
-    // test if contained by one subtree; if so, add it to it inteaad
-    bool fContained = (clusThisOnly.GetSize() == 0);
-    if (fContained) {
-      GetChild(i)->InsertNode(pNode);
-      return;
+    listChildren.push_back(pChild);
+    pChild->SetParent(this);
+}
+
+void ScistPerfPhyClusTreeNode ::RemoveChild(ScistPerfPhyClusTreeNode *pChild)
+{
+    //
+    pChild->SetParent(NULL);
+    listChildren.erase(std::remove(listChildren.begin(), listChildren.end(), pChild), listChildren.end());
+}
+
+void ScistPerfPhyClusTreeNode ::InsertNode(ScistPerfPhyClusTreeNode *pNode)
+{
+    //cout << "Insert node: ";
+    //pNode->Dump();
+    //cout << " under parent node: ";
+    //Dump();
+    // insert this node below it; may need to split children if there are multiple ones
+    // we assume there is no incompatibility occuring here
+    vector<ScistPerfPhyClusTreeNode *> listChildrenContained;
+    for (int i = 0; i < GetNumChildren(); ++i)
+    {
+        //
+        ScistPerfPhyCluster clusInt, clusThisOnly, clusRHSOnly;
+        pNode->GetClus()->IntersectWith(*GetChild(i)->GetClus(), clusInt, clusThisOnly, clusRHSOnly);
+
+        // test if contained by one subtree; if so, add it to it inteaad
+        bool fContained = (clusThisOnly.GetSize() == 0);
+        if (fContained)
+        {
+            GetChild(i)->InsertNode(pNode);
+            return;
+        }
+        bool fContaining = (clusRHSOnly.GetSize() == 0);
+        bool fDisjoint = (clusInt.GetSize() == 0);
+        if (fContaining)
+        {
+            listChildrenContained.push_back(GetChild(i));
+        }
+        else
+        {
+            YW_ASSERT_INFO(fDisjoint == true, "Wrong: the site is not compatible with the tree");
+        }
     }
-    bool fContaining = (clusRHSOnly.GetSize() == 0);
-    bool fDisjoint = (clusInt.GetSize() == 0);
-    if (fContaining) {
-      listChildrenContained.push_back(GetChild(i));
-    } else {
-      YW_ASSERT_INFO(fDisjoint == true,
-                     "Wrong: the site is not compatible with the tree");
+    //if( listChildrenContained.size() == 0 )
+    //{
+    // just add it below
+    //    AddChild(pNode);
+    //}
+    //else
+    //{
+    //
+    for (int i = 0; i < (int)listChildrenContained.size(); ++i)
+    {
+        RemoveChild(listChildrenContained[i]);
+        pNode->AddChild(listChildrenContained[i]);
+    }
+    AddChild(pNode);
+    //}
+}
+
+void ScistPerfPhyClusTreeNode ::Dump() const
+{
+    cout << "Node: "
+         << ", num of children: " << GetNumChildren() << ": ";
+    if (GetClus() == NULL)
+    {
+        cout << "root. \n";
+    }
+    else
+    {
+        GetClus()->Dump();
     }
-  }
-  // if( listChildrenContained.size() == 0 )
-  //{
-  // just add it below
-  //    AddChild(pNode);
-  //}
-  // else
-  //{
-  //
-  for (int i = 0; i < (int)listChildrenContained.size(); ++i) {
-    RemoveChild(listChildrenContained[i]);
-    pNode->AddChild(listChildrenContained[i]);
-  }
-  AddChild(pNode);
-  //}
-}
-
-void ScistPerfPhyClusTreeNode ::Dump() const {
-  cout << "Node: "
-       << ", num of children: " << GetNumChildren() << ": ";
-  if (GetClus() == NULL) {
-    cout << "root. \n";
-  } else {
-    GetClus()->Dump();
-  }
 }
 
 // *************************************************************************************
 // Guide tree
 
-ScistPerfPhyGuideTree ::ScistPerfPhyGuideTree() {}
-
-void ScistPerfPhyGuideTree ::Init(const std::string &strGuideTree) {
-  this->setGuideTreeClus.clear();
-  // cout << "INIT guide tree...\n";
-  // extract clusters in the tree
-  PhylogenyTreeBasic treeGuide;
-  treeGuide.ConsOnNewick(strGuideTree);
-
-  // get all clusters that don't have zero length (i.e. not non-informative)
-  PhylogenyTreeIterator itorTree(treeGuide);
-  itorTree.Init();
-  while (itorTree.IsDone() == false) {
-    TreeNode *pn = itorTree.GetCurrNode();
-    if (pn->IsLeaf() == false && pn->IsRoot() == false)
-    // if( pn->GetLength() >= MIN_POS_VAL && pn->IsLeaf() == false &&
-    // pn->IsRoot() == false)
+ScistPerfPhyGuideTree ::ScistPerfPhyGuideTree()
+{
+}
+
+void ScistPerfPhyGuideTree ::Init(const std::string &strGuideTree)
+{
+    this->setGuideTreeClus.clear();
+    //cout << "INIT guide tree...\n";
+    // extract clusters in the tree
+    PhylogenyTreeBasic treeGuide;
+    treeGuide.ConsOnNewick(strGuideTree);
+
+    // get all clusters that don't have zero length (i.e. not non-informative)
+    PhylogenyTreeIterator itorTree(treeGuide);
+    itorTree.Init();
+    while (itorTree.IsDone() == false)
     {
-      set<int> ss;
-      pn->GetAllDescendIntLbls(ss);
-      DecAllNumInSet(ss);
-      ScistPerfPhyCluster clus(ss);
-      this->setGuideTreeClus.insert(clus);
-      // cout << "guide tree cluster: ";
-      // clus.Dump();
-    }
+        TreeNode *pn = itorTree.GetCurrNode();
+        if (pn->IsLeaf() == false && pn->IsRoot() == false)
+        //if( pn->GetLength() >= MIN_POS_VAL && pn->IsLeaf() == false && pn->IsRoot() == false)
+        {
+            set<int> ss;
+            pn->GetAllDescendIntLbls(ss);
+            DecAllNumInSet(ss);
+            ScistPerfPhyCluster clus(ss);
+            this->setGuideTreeClus.insert(clus);
+        }
 
-    itorTree.Next();
-  }
-
-  // set<set<int> > setClades;
-  // treeGuide.GetAllClades(setClades);
-  // for(set<set<int> > :: iterator it = setClades.begin(); it !=
-  // setClades.end(); ++it)
-  //{
-  //    set<int> ss=*it;
-  //    DecAllNumInSet(ss);
-  //    ScistPerfPhyCluster clus(ss);
-  //    this->setGuideTreeClus.insert(clus);
-  // cout << "guide tree cluster: ";
-  // clus.Dump();
-  //}
-}
-
-void ScistPerfPhyGuideTree ::InitDecAll(const std::string &strGuideTree1Base) {
-  //
-  this->setGuideTreeClus.clear();
-  // cout << "INIT guide tree...\n";
-  // extract clusters in the tree
-  PhylogenyTreeBasic treeGuide;
-  treeGuide.ConsOnNewick(strGuideTree1Base);
-
-  // dec by one
-  // map<int,int> mapOldToNew;
-  // for(int i=0; i<treeGuide.GetNumLeaves(); ++i)
-  //{
-  //    mapOldToNew[i+1] = i;
-  //}
-  // ChangeLeafIntLabelOfTree(treeGuide, mapOldToNew, true);
-
-  // get all clusters that don't have zero length (i.e. not non-informative)
-  PhylogenyTreeIterator itorTree(treeGuide);
-  itorTree.Init();
-  while (itorTree.IsDone() == false) {
-    TreeNode *pn = itorTree.GetCurrNode();
-    if (pn->IsLeaf() == false && pn->IsRoot() == false) {
-      set<int> ss;
-      pn->GetAllDescendIntLbls(ss);
-      DecAllNumInSet(ss);
-      ScistPerfPhyCluster clus(ss);
-      this->setGuideTreeClus.insert(clus);
-      // cout << "guide tree cluster: ";
-      // clus.Dump();
+        itorTree.Next();
     }
-
-    itorTree.Next();
-  }
 }
 
-double ScistPerfPhyGuideTree ::EvalClus(const ScistPerfPhyCluster &clus) const {
-  // find the best match
-  double res = 0.0;
-  if (setGuideTreeClus.size() == 0) {
-    return res;
-  }
-  for (set<ScistPerfPhyCluster>::const_iterator it = setGuideTreeClus.begin();
-       it != setGuideTreeClus.end(); ++it) {
+void ScistPerfPhyGuideTree ::InitDecAll(const std::string &strGuideTree1Base)
+{
     //
-    int score = EvalClusWith(clus, *it);
-    res += score;
-    // if( res < score )
+    this->setGuideTreeClus.clear();
+    //cout << "INIT guide tree...\n";
+    // extract clusters in the tree
+    PhylogenyTreeBasic treeGuide;
+    treeGuide.ConsOnNewick(strGuideTree1Base);
+
+    // dec by one
+    //map<int,int> mapOldToNew;
+    //for(int i=0; i<treeGuide.GetNumLeaves(); ++i)
     //{
-    //    res = score;
+    //    mapOldToNew[i+1] = i;
     //}
-  }
-  // return res;
-  return res / setGuideTreeClus.size();
+    //ChangeLeafIntLabelOfTree(treeGuide, mapOldToNew, true);
+
+    // get all clusters that don't have zero length (i.e. not non-informative)
+    PhylogenyTreeIterator itorTree(treeGuide);
+    itorTree.Init();
+    while (itorTree.IsDone() == false)
+    {
+        TreeNode *pn = itorTree.GetCurrNode();
+        if (pn->IsLeaf() == false && pn->IsRoot() == false)
+        {
+            set<int> ss;
+            pn->GetAllDescendIntLbls(ss);
+            DecAllNumInSet(ss);
+            ScistPerfPhyCluster clus(ss);
+            this->setGuideTreeClus.insert(clus);
+            //cout << "guide tree cluster: ";
+            //clus.Dump();
+        }
+
+        itorTree.Next();
+    }
 }
 
-int ScistPerfPhyGuideTree ::EvalClusWith(
-    const ScistPerfPhyCluster &clus, const ScistPerfPhyCluster &clusInTree) {
-  // score (dissimlarity): high means the better fit. Score: percentage of
-  // smallest diff; use Jaccard distance that is, size of intersection over size
-  // of union YW: try compat (1.0) and incompat (0.0) and take average
-  int res = 1;
-  if (clus.IsCompatibleWith(clusInTree) == true) {
-    res = 0;
-  }
-  return res;
+double ScistPerfPhyGuideTree ::EvalClus(const ScistPerfPhyCluster &clus) const
+{
+    // find the best match
+    double res = 0.0;
+    if (setGuideTreeClus.size() == 0)
+    {
+        return res;
+    }
+    for (set<ScistPerfPhyCluster>::const_iterator it = setGuideTreeClus.begin(); it != setGuideTreeClus.end(); ++it)
+    {
+        //
+        int score = EvalClusWith(clus, *it);
+        res += score;
+    }
+    //return res;
+    return res / setGuideTreeClus.size();
+}
 
-  // ScistPerfPhyCluster clusUnion = clus;
-  // clusUnion.UnionWith(clusInTree);
-  // ScistPerfPhyCluster clusInt, clus1, clus2;
-  // clus.IntersectWith(clusInTree, clusInt, clus1, clus2);
-  // return ((double) clusInt.GetSize() )/ clusUnion.GetSize();
+int ScistPerfPhyGuideTree ::EvalClusWith(const ScistPerfPhyCluster &clus, const ScistPerfPhyCluster &clusInTree)
+{
+    // score (dissimlarity): high means the better fit. Score: percentage of smallest diff; use Jaccard distance
+    // that is, size of intersection over size of union
+    // YW: try compat (1.0) and incompat (0.0) and take average
+    int res = 1;
+    if (clus.IsCompatibleWith(clusInTree) == true)
+    {
+        res = 0;
+    }
+    return res;
 }
 
 // *************************************************************************************
 // Inf perfect phylogeny from genotypes
 
-ScistInfPerfPhyUtils ::ScistInfPerfPhyUtils() {}
-
-ScistInfPerfPhyUtils ::~ScistInfPerfPhyUtils() {}
-
-std::string ScistInfPerfPhyUtils ::ConsTreeWCombDistClus(
-    const ScistGenGenotypeMat &genos,
-    const std::map<int, ScistPerfPhyCluster> &setClus,
-    bool fUseGenoName) const {
-  // not only construct a tree, but also consider the distance
-  set<set<int> > setClustersMustHave;
-  for (map<int, ScistPerfPhyCluster>::const_iterator it = setClus.begin();
-       it != setClus.end(); ++it) {
-    set<int> setOnes;
-    it->second.GetClus(setOnes);
-    setClustersMustHave.insert(setOnes);
-  }
-
-#if 0
-    // add all NJ clusters when compatible
-    std::set< ScistPerfPhyCluster > clusAll;
-    this->treeGuide.GetAllClusters( clusAll );
-    //string strGuideTree = genos.ConsNJTree();
-//cout << "Neighbor joining tree from corrected genotypes: " << strGuideTree << endl;
-    //PhylogenyTreeBasic treeGuide;
-    //treeGuide.ConsOnNewick(strGuideTree);
-    //set<set<int> > setClades;
-    //treeGuide.GetAllClades(setClades);
-    //for(set<set<int> > :: iterator it = setClades.begin(); it != setClades.end(); ++it)
-    for(set<ScistPerfPhyCluster> :: iterator it = clusAll.begin(); it != clusAll.end(); ++it)
+ScistInfPerfPhyUtils ::ScistInfPerfPhyUtils()
+{
+}
+
+ScistInfPerfPhyUtils ::~ScistInfPerfPhyUtils()
+{
+}
+
+std::string ScistInfPerfPhyUtils ::ConsTreeWCombDistClus(const ScistGenGenotypeMat &genos, const std::map<int, ScistPerfPhyCluster> &setClus, bool fUseGenoName) const
+{
+    // not only construct a tree, but also consider the distance
+    set<set<int>> setClustersMustHave;
+    for (map<int, ScistPerfPhyCluster>::const_iterator it = setClus.begin(); it != setClus.end(); ++it)
+    {
+        set<int> setOnes;
+        it->second.GetClus(setOnes);
+        setClustersMustHave.insert(setOnes);
+    }
+
+    PhyloDistance phyDist;
+
+    int numberHaps = genos.GetNumHaps();
+
+    for (int r1 = 0; r1 < numberHaps; ++r1)
     {
-        //set<int> ss=*it;
-        set<int> ss;
-        it->GetClus(ss);
-        //DecAllNumInSet(ss);
-        //ScistPerfPhyCluster clus(ss);
-        ScistPerfPhyCluster clus = *it;
-        if( clus.GetSize() <=1 )
+        phyDist.SetDistance(r1, r1, 0.0);
+        for (int r2 = r1 + 1; r2 < numberHaps; ++r2)
         {
-            continue;
+            //set<int> setDiffs;
+            //matClus.GetSequencesDiffSites(r1,r2, setDiffs);
+            //double dist = ((double)setDiffs.size())/genosInput.GetNumSites();
+            double dist = genos.CalcHammingDistBetwHaps(r1, r2);
+            phyDist.SetDistance(r1, r2, dist);
+            phyDist.SetDistance(r2, r1, dist);
         }
-        // make sure compatible with exisitng clusters
-        bool fCompat = true;
-        for( map<int, ScistPerfPhyCluster> :: const_iterator it2 = setClus.begin(); it2 != setClus.end(); ++it2 )
+    }
+
+    // build tree
+    set<set<int>> setClustersForbiddenEmpty;
+    ConstrainedUPGMATreeBuilder treeBuilder(phyDist, setClustersMustHave, setClustersForbiddenEmpty);
+    while (treeBuilder.IsDone() == false)
+    {
+        set<int> st1, st2;
+        double minDist = treeBuilder.GetMinCoalSubtrees(st1, st2);
+        //cout << "Merging subtrees ht " << minDist << ": ";
+        //DumpIntSet(st1);
+        //DumpIntSet(st2);
+        treeBuilder.MergeSubtrees(st1, st2, minDist);
+    }
+    string strTreeRaw = treeBuilder.GetTree();
+    //cout << "Constructed tree with clusters and distance: " << strTreeRaw << endl;
+
+    // convert labels
+    PhylogenyTreeBasic phTree;
+    phTree.ConsOnNewick(strTreeRaw);
+
+    map<string, string> mapIdToLabels;
+    for (int i = 0; i < numberHaps; ++i)
+    {
+        //cout << "i: " << i << ", name: " << this->genosInput.GetGenotypeName(i) << endl;
+        //string str = "(" + std::to_string(i) + ")";
+        string str = std::to_string(i);
+        if (fUseGenoName)
         {
-            if( clus.IsCompatibleWith(it2->second) == false )
-            {
-                fCompat = false;
-                break;
-            }
+            mapIdToLabels[str] = genos.GetGenotypeName(i);
         }
-        if( fCompat)
+        else
         {
-//cout << "ADDING guide tree cluster: ";
-//clus.Dump();
-            setClustersMustHave.insert( ss );
+            mapIdToLabels[str] = std::to_string(i + 1);
         }
-
     }
-#endif
+    phTree.ReassignLeafLabels(mapIdToLabels);
+    // use base-1 label
+    phTree.IncEdgeLabelsBy(1);
+
+    string res;
+    phTree.ConsNewickSorted(res);
+    //phTree.ConsNewick(res, false, 0.0, true);
+
+    return res;
+}
 
-  //
-  PhyloDistance phyDist;
-#if 0
-    BinaryMatrix matClus;
-    matClus.SetSize( genosInput.GetNumHaps(), genosInput.GetNumSites() );
+void ScistInfPerfPhyUtils ::FillClusterFromMat(const ScistGenGenotypeMat &genos, int site, ScistPerfPhyCluster &clus)
+{
     //
-    for( map<int, ScistPerfPhyCluster> :: const_iterator it = setClus.begin(); it != setClus.end(); ++it )
+    for (int r = 0; r < genos.GetNumHaps(); ++r)
     {
-        for(int i=0; i<genosInput.GetNumHaps(); ++i)
+        int v = genos.GetGenotypeAt(r, site);
+        if (v != 0)
         {
-            matClus.SetValAt( i, it->first, 0 );
+            clus.AddMutSC(r);
         }
-        set<int> setOnes;
-        it->second.GetClus(setOnes);
-        for( set<int> :: iterator it2 = setOnes.begin(); it2 != setOnes.end(); ++it2 )
-        {
-            matClus.SetValAt(*it2, it->first, 1);
-        }
-    }
-//cout << "matClus: ";
-//matClus.Dump();
-#endif
-  for (int r1 = 0; r1 < genos.GetNumHaps(); ++r1) {
-    phyDist.SetDistance(r1, r1, 0.0);
-    for (int r2 = r1 + 1; r2 < genos.GetNumHaps(); ++r2) {
-      // set<int> setDiffs;
-      // matClus.GetSequencesDiffSites(r1,r2, setDiffs);
-      // double dist = ((double)setDiffs.size())/genosInput.GetNumSites();
-      double dist = genos.CalcHammingDistBetwHaps(r1, r2);
-      phyDist.SetDistance(r1, r2, dist);
-      phyDist.SetDistance(r2, r1, dist);
-    }
-  }
-
-  // build tree
-  set<set<int> > setClustersForbiddenEmpty;
-  ConstrainedUPGMATreeBuilder treeBuilder(phyDist, setClustersMustHave,
-                                          setClustersForbiddenEmpty);
-  while (treeBuilder.IsDone() == false) {
-    set<int> st1, st2;
-    double minDist = treeBuilder.GetMinCoalSubtrees(st1, st2);
-    // cout << "Merging subtrees ht " << minDist << ": ";
-    // DumpIntSet(st1);
-    // DumpIntSet(st2);
-    treeBuilder.MergeSubtrees(st1, st2, minDist);
-  }
-  string strTreeRaw = treeBuilder.GetTree();
-  // cout << "Constructed tree with clusters and distance: " << strTreeRaw <<
-  // endl;
-
-  // convert labels
-  PhylogenyTreeBasic phTree;
-  phTree.ConsOnNewick(strTreeRaw);
-
-  map<string, string> mapIdToLabels;
-  for (int i = 0; i < genos.GetNumHaps(); ++i) {
-    // cout << "i: " << i << ", name: " << this->genosInput.GetGenotypeName(i)
-    // << endl; string str = "(" + std::to_string(i) + ")";
-    string str = std::to_string(i);
-    if (fUseGenoName) {
-      mapIdToLabels[str] = genos.GetGenotypeName(i);
-    } else {
-      mapIdToLabels[str] = std::to_string(i + 1);
-    }
-  }
-  phTree.ReassignLeafLabels(mapIdToLabels);
-  // use base-1 label
-  phTree.IncEdgeLabelsBy(1);
-
-  string res;
-  phTree.ConsNewickSorted(res);
-  // phTree.ConsNewick(res, false, 0.0, true);
-
-  // output a tree in GML format
-  // if( this->fOutput )
-  //{
-  //    string fileNameOut =  genosInput.GetFileName() + ".tree.gml";
-  //    phTree.OutputGML(fileNameOut.c_str());
-  //}
-
-  return res;
-}
-
-void ScistInfPerfPhyUtils ::FillClusterFromMat(const ScistGenGenotypeMat &genos,
-                                               int site,
-                                               ScistPerfPhyCluster &clus) {
-  //
-  for (int r = 0; r < genos.GetNumHaps(); ++r) {
-    int v = genos.GetGenotypeAt(r, site);
-    if (v != 0) {
-      clus.AddMutSC(r);
     }
-  }
 }
 
 // *************************************************************************************
 
-void ScistInfPerfPhyTest() {
-  ScistHaplotypeMat genoMat;
-  const int numSCs = 4, numSites = 3;
-  genoMat.SetSize(numSCs, numSites);
-  genoMat.SetGenotypeAt(0, 0, 1);
-  genoMat.SetGenotypeAt(0, 1, 0);
-  genoMat.SetGenotypeAt(0, 2, 1);
-  genoMat.SetGenotypeAt(1, 0, 1);
-  genoMat.SetGenotypeAt(1, 1, 1);
-  genoMat.SetGenotypeAt(1, 2, 0);
-  genoMat.SetGenotypeAt(2, 0, 0);
-  genoMat.SetGenotypeAt(2, 1, 1);
-  genoMat.SetGenotypeAt(2, 2, 1);
-  genoMat.SetGenotypeAt(3, 0, 0);
-  genoMat.SetGenotypeAt(3, 1, 1);
-  genoMat.SetGenotypeAt(3, 2, 0);
-
-  // ScistInfPerfPhy ppInf( genoMat );
-  // ppInf.InferGreedy();
+void ScistInfPerfPhyTest()
+{
+    ScistHaplotypeMat genoMat;
+    const int numSCs = 4, numSites = 3;
+    genoMat.SetSize(numSCs, numSites);
+    genoMat.SetGenotypeAt(0, 0, 1);
+    genoMat.SetGenotypeAt(0, 1, 0);
+    genoMat.SetGenotypeAt(0, 2, 1);
+    genoMat.SetGenotypeAt(1, 0, 1);
+    genoMat.SetGenotypeAt(1, 1, 1);
+    genoMat.SetGenotypeAt(1, 2, 0);
+    genoMat.SetGenotypeAt(2, 0, 0);
+    genoMat.SetGenotypeAt(2, 1, 1);
+    genoMat.SetGenotypeAt(2, 2, 1);
+    genoMat.SetGenotypeAt(3, 0, 0);
+    genoMat.SetGenotypeAt(3, 1, 1);
+    genoMat.SetGenotypeAt(3, 2, 0);
+
+    //ScistInfPerfPhy ppInf( genoMat );
+    //ppInf.InferGreedy();
 }
diff --git a/trisicell/external/scistree/ScistPerfPhyUtils.hpp b/trisicell/external/scistree/ScistPerfPhyUtils.hpp
index 3338730..e08ded9 100644
--- a/trisicell/external/scistree/ScistPerfPhyUtils.hpp
+++ b/trisicell/external/scistree/ScistPerfPhyUtils.hpp
@@ -9,9 +9,9 @@
 #ifndef ScistPerfPhyUtils_hpp
 #define ScistPerfPhyUtils_hpp
 
-#include <map>
 #include <set>
 #include <vector>
+#include <map>
 
 class ScistGenGenotypeMat;
 class PhylogenyTree;
@@ -21,123 +21,111 @@ class PhylogenyTree;
 
 class ScistPerfPhyCluster;
 
-class ScistPerfPhyClusterItor {
+class ScistPerfPhyClusterItor
+{
 public:
-  ScistPerfPhyClusterItor(const ScistPerfPhyCluster &clusIn) : clus(clusIn) {
-    First();
-  }
-  void First();
-  void Next();
-  bool IsDone();
-  int GetCurrentSC() const;
+    ScistPerfPhyClusterItor(const ScistPerfPhyCluster &clusIn) : clus(clusIn) { First(); }
+    void First();
+    void Next();
+    bool IsDone();
+    int GetCurrentSC() const;
 
 private:
-  const ScistPerfPhyCluster &clus;
-  std::set<int>::const_iterator it;
+    const ScistPerfPhyCluster &clus;
+    std::set<int>::const_iterator it;
 };
 
-class ScistPerfPhyCluster {
-  friend class ScistPerfPhyClusterItor;
+class ScistPerfPhyCluster
+{
+    friend class ScistPerfPhyClusterItor;
 
 public:
-  ScistPerfPhyCluster();
-  ScistPerfPhyCluster(const std::set<int> &clus);
-  ScistPerfPhyCluster(const ScistPerfPhyCluster &rhs);
-  ScistPerfPhyCluster &operator=(const ScistPerfPhyCluster &rhs);
-
-  bool operator<(const ScistPerfPhyCluster &rhs) const;
-  int GetSize() const { return setMutSCs.size(); }
-  void IntersectWith(const ScistPerfPhyCluster &rhs,
-                     ScistPerfPhyCluster &clusInt,
-                     ScistPerfPhyCluster &clusThisOnly,
-                     ScistPerfPhyCluster &clusRHSOnly) const;
-  void SubtractFrom(const ScistPerfPhyCluster &rhs);
-  void UnionWith(const ScistPerfPhyCluster &rhs);
-  void Clear() { setMutSCs.clear(); }
-  void GetGenoBinVec(int numSCs, std::vector<int> &vecGeno) const;
-  void GetClus(std::set<int> &clus) const { clus = setMutSCs; }
-  bool IsCompatibleWith(const ScistPerfPhyCluster &rhs) const;
-  bool IsCompatibleWith(const std::set<ScistPerfPhyCluster> &setClus) const;
-  void GetSplitPartsWith(const ScistPerfPhyCluster &rhs,
-                         std::vector<std::set<int> > &listParts) const;
-  void AddMutSC(int r) { setMutSCs.insert(r); }
-  void FlipAlleleAt(int r);
-  int GetAlleleAt(int r) const;
-  void Dump() const;
+    ScistPerfPhyCluster();
+    ScistPerfPhyCluster(const std::set<int> &clus);
+    ScistPerfPhyCluster(const ScistPerfPhyCluster &rhs);
+    ScistPerfPhyCluster &operator=(const ScistPerfPhyCluster &rhs);
+
+    bool operator<(const ScistPerfPhyCluster &rhs) const;
+    int GetSize() const { return setMutSCs.size(); }
+    void IntersectWith(const ScistPerfPhyCluster &rhs, ScistPerfPhyCluster &clusInt, ScistPerfPhyCluster &clusThisOnly, ScistPerfPhyCluster &clusRHSOnly) const;
+    void SubtractFrom(const ScistPerfPhyCluster &rhs);
+    void UnionWith(const ScistPerfPhyCluster &rhs);
+    void Clear() { setMutSCs.clear(); }
+    void GetGenoBinVec(int numSCs, std::vector<int> &vecGeno) const;
+    void GetClus(std::set<int> &clus) const { clus = setMutSCs; }
+    bool IsCompatibleWith(const ScistPerfPhyCluster &rhs) const;
+    bool IsCompatibleWith(const std::set<ScistPerfPhyCluster> &setClus) const;
+    void GetSplitPartsWith(const ScistPerfPhyCluster &rhs, std::vector<std::set<int>> &listParts) const;
+    void AddMutSC(int r) { setMutSCs.insert(r); }
+    void FlipAlleleAt(int r);
+    int GetAlleleAt(int r) const;
+    void Dump() const;
 
 private:
-  std::set<int> setMutSCs;
+    std::set<int> setMutSCs;
 };
 
 // *************************************************************************************
 // Cluster partial order tree node
 
-class ScistPerfPhyClusTreeNode {
+class ScistPerfPhyClusTreeNode
+{
 public:
-  ScistPerfPhyClusTreeNode(const ScistPerfPhyCluster *pClusIn)
-      : pClus(pClusIn), pParent(NULL) {}
-  ~ScistPerfPhyClusTreeNode();
-  static ScistPerfPhyClusTreeNode *
-  ConsClusterTree(const std::map<int, ScistPerfPhyCluster> &setSeedSites,
-                  bool fNoDup = false);
-  static ScistPerfPhyClusTreeNode *
-  ConsClusterTree(const std::set<ScistPerfPhyCluster> &setSeedSites);
-  void SetParent(ScistPerfPhyClusTreeNode *pParentIn) { pParent = pParentIn; }
-  ScistPerfPhyClusTreeNode *GetParent() { return pParent; }
-  int GetNumChildren() const { return listChildren.size(); }
-  ScistPerfPhyClusTreeNode *GetChild(int i) const { return listChildren[i]; }
-  const ScistPerfPhyCluster *GetClus() const { return pClus; }
-  void AddChild(ScistPerfPhyClusTreeNode *pChild);
-  void RemoveChild(ScistPerfPhyClusTreeNode *pChild);
-  void InsertNode(ScistPerfPhyClusTreeNode *pNode);
-  bool IsRoot() const { return pParent == NULL; }
-  bool IsLeaf() const { return GetNumChildren() == 0; }
-  void Dump() const;
+    ScistPerfPhyClusTreeNode(const ScistPerfPhyCluster *pClusIn) : pClus(pClusIn), pParent(NULL) {}
+    ~ScistPerfPhyClusTreeNode();
+    static ScistPerfPhyClusTreeNode *ConsClusterTree(const std::map<int, ScistPerfPhyCluster> &setSeedSites, bool fNoDup = false);
+    static ScistPerfPhyClusTreeNode *ConsClusterTree(const std::set<ScistPerfPhyCluster> &setSeedSites);
+    void SetParent(ScistPerfPhyClusTreeNode *pParentIn) { pParent = pParentIn; }
+    ScistPerfPhyClusTreeNode *GetParent() { return pParent; }
+    int GetNumChildren() const { return listChildren.size(); }
+    ScistPerfPhyClusTreeNode *GetChild(int i) const { return listChildren[i]; }
+    const ScistPerfPhyCluster *GetClus() const { return pClus; }
+    void AddChild(ScistPerfPhyClusTreeNode *pChild);
+    void RemoveChild(ScistPerfPhyClusTreeNode *pChild);
+    void InsertNode(ScistPerfPhyClusTreeNode *pNode);
+    bool IsRoot() const { return pParent == NULL; }
+    bool IsLeaf() const { return GetNumChildren() == 0; }
+    void Dump() const;
 
 private:
-  const ScistPerfPhyCluster *pClus;
-  ScistPerfPhyClusTreeNode *pParent;
-  std::vector<ScistPerfPhyClusTreeNode *> listChildren;
+    const ScistPerfPhyCluster *pClus;
+    ScistPerfPhyClusTreeNode *pParent;
+    std::vector<ScistPerfPhyClusTreeNode *> listChildren;
 };
 
 // *************************************************************************************
 // Guide tree
 
-class ScistPerfPhyGuideTree {
+class ScistPerfPhyGuideTree
+{
 public:
-  ScistPerfPhyGuideTree();
-  void Init(const std::string &strGuideTree);
-  void InitDecAll(const std::string &strGuideTree1Base);
-  double EvalClus(const ScistPerfPhyCluster &clus) const;
-  void GetAllClusters(std::set<ScistPerfPhyCluster> &clusAll) const {
-    clusAll = this->setGuideTreeClus;
-  }
+    ScistPerfPhyGuideTree();
+    void Init(const std::string &strGuideTree);
+    void InitDecAll(const std::string &strGuideTree1Base);
+    double EvalClus(const ScistPerfPhyCluster &clus) const;
+    void GetAllClusters(std::set<ScistPerfPhyCluster> &clusAll) const { clusAll = this->setGuideTreeClus; }
 
 private:
-  static int EvalClusWith(const ScistPerfPhyCluster &clus,
-                          const ScistPerfPhyCluster &clusInTree);
+    static int EvalClusWith(const ScistPerfPhyCluster &clus, const ScistPerfPhyCluster &clusInTree);
 
-  std::set<ScistPerfPhyCluster> setGuideTreeClus;
+    std::set<ScistPerfPhyCluster> setGuideTreeClus;
 };
 
 // *************************************************************************************
 // Inf perfect phylogeny from genotypes
 
-class ScistInfPerfPhyUtils {
+class ScistInfPerfPhyUtils
+{
 public:
-  ScistInfPerfPhyUtils();
-  ~ScistInfPerfPhyUtils();
-  static void FillClusterFromMat(const ScistGenGenotypeMat &genos, int site,
-                                 ScistPerfPhyCluster &clus);
-  std::string
-  ConsTreeWCombDistClus(const ScistGenGenotypeMat &genos,
-                        const std::map<int, ScistPerfPhyCluster> &setClus,
-                        bool fUseGenoName = true) const;
+    ScistInfPerfPhyUtils();
+    ~ScistInfPerfPhyUtils();
+    static void FillClusterFromMat(const ScistGenGenotypeMat &genos, int site, ScistPerfPhyCluster &clus);
+    std::string ConsTreeWCombDistClus(const ScistGenGenotypeMat &genos, const std::map<int, ScistPerfPhyCluster> &setClus, bool fUseGenoName = true) const;
 
 private:
-  void ClearClusTree();
+    void ClearClusTree();
 
-  ScistPerfPhyClusTreeNode *pClusTreeRoot;
+    ScistPerfPhyClusTreeNode *pClusTreeRoot;
 };
 
 // *************************************************************************************
diff --git a/trisicell/external/scistree/TreeBuilder.cpp b/trisicell/external/scistree/TreeBuilder.cpp
index fc39e26..3328f0e 100644
--- a/trisicell/external/scistree/TreeBuilder.cpp
+++ b/trisicell/external/scistree/TreeBuilder.cpp
@@ -7,1395 +7,1359 @@
 //
 
 #include "TreeBuilder.h"
-#include "Utils.h"
 #include <string>
+#include "Utils.h"
 
 //***********************************************************************
-void TestNJ() {
-  //
-  PhyloDistance distNJ;
-  distNJ.SetDistance(1, 2, 5.0);
-  distNJ.SetDistance(1, 3, 9.0);
-  distNJ.SetDistance(1, 4, 9.0);
-  distNJ.SetDistance(1, 5, 8.0);
-  distNJ.SetDistance(2, 3, 10.0);
-  distNJ.SetDistance(2, 4, 10.0);
-  distNJ.SetDistance(2, 5, 9.0);
-  distNJ.SetDistance(3, 4, 8.0);
-  distNJ.SetDistance(3, 5, 7.0);
-  distNJ.SetDistance(4, 5, 3.0);
-
-  DistanceTreeBuilder builder(distNJ);
-  string treeNW = builder.NJ();
-  cout << "Constructed NJ tree: " << treeNW << endl;
-  // distNJ.Dump();
+void TestNJ()
+{
+    //
+    PhyloDistance distNJ;
+    distNJ.SetDistance(1, 2, 5.0);
+    distNJ.SetDistance(1, 3, 9.0);
+    distNJ.SetDistance(1, 4, 9.0);
+    distNJ.SetDistance(1, 5, 8.0);
+    distNJ.SetDistance(2, 3, 10.0);
+    distNJ.SetDistance(2, 4, 10.0);
+    distNJ.SetDistance(2, 5, 9.0);
+    distNJ.SetDistance(3, 4, 8.0);
+    distNJ.SetDistance(3, 5, 7.0);
+    distNJ.SetDistance(4, 5, 3.0);
+
+    DistanceTreeBuilder builder(distNJ);
+    string treeNW = builder.NJ();
+    cout << "Constructed NJ tree: " << treeNW << endl;
+    //distNJ.Dump();
 }
 
 //***********************************************************************
 // define distances between taxa
 
-void PhyloDistance ::SetDistance(int node1, int node2, double dist) {
-  //
-  pair<int, int> pp(node1, node2);
-  mapDists.insert(map<pair<int, int>, double>::value_type(pp, dist));
+void PhyloDistance ::SetDistance(int node1, int node2, double dist)
+{
+    //
+    std::pair<int, int> pp(node1, node2);
+    mapDists.insert(std::map<std::pair<int, int>, double>::value_type(pp, dist));
 }
 
-double PhyloDistance ::GetDistance(int node1, int node2) const {
-  //
-  PhyloDistance *pthis = const_cast<PhyloDistance *>(this);
-  pair<int, int> pp1(node1, node2), pp2(node2, node1);
-  if (mapDists.find(pp1) != mapDists.end()) {
+double PhyloDistance ::GetDistance(int node1, int node2) const
+{
     //
-    return pthis->mapDists[pp1];
-  }
-  if (mapDists.find(pp2) != mapDists.end()) {
-    //
-    return pthis->mapDists[pp2];
-  }
-  YW_ASSERT_INFO(false, "Fail to find");
-  return 0.0;
+    PhyloDistance *pthis = const_cast<PhyloDistance *>(this);
+    std::pair<int, int> pp1(node1, node2), pp2(node2, node1);
+    if (mapDists.find(pp1) != mapDists.end())
+    {
+        //
+        return pthis->mapDists[pp1];
+    }
+    if (mapDists.find(pp2) != mapDists.end())
+    {
+        //
+        return pthis->mapDists[pp2];
+    }
+    YW_ASSERT_INFO(false, "Fail to find");
+    return 0.0;
 }
 
-double PhyloDistance ::GetDistanceNonNeg(int node1, int node2) const {
-  //
-  double dist = GetDistance(node1, node2);
-  if (dist < 0.0) {
-    dist = 0.0;
-  }
-  return dist;
+double PhyloDistance ::GetDistanceNonNeg(int node1, int node2) const
+{
+    //
+    double dist = GetDistance(node1, node2);
+    if (dist < 0.0)
+    {
+        dist = 0.0;
+    }
+    return dist;
 }
 
-void PhyloDistance ::GetAllNodes(set<int> &nodesAll) const {
-  // cout << "PhyloDistance :: GetAllNodes: dump: ";
-  // this->Dump();
-  //
-  nodesAll.clear();
-  for (map<pair<int, int>, double>::const_iterator it = mapDists.begin();
-       it != mapDists.end(); ++it) {
-    nodesAll.insert(it->first.first);
-    nodesAll.insert(it->first.second);
-  }
+void PhyloDistance ::GetAllNodes(set<int> &nodesAll) const
+{
+    //cout << "PhyloDistance :: GetAllNodes: dump: ";
+    //this->Dump();
+    //
+    nodesAll.clear();
+    for (map<pair<int, int>, double>::const_iterator it = mapDists.begin(); it != mapDists.end(); ++it)
+    {
+        nodesAll.insert(it->first.first);
+        nodesAll.insert(it->first.second);
+    }
 }
 
-double PhyloDistance ::CalcAveDistBtwClusters(
-    const set<set<int> > &setClusters) const {
-  //
-  double res = 0.0;
-  int numDist = 0;
-
-  for (set<set<int> >::const_iterator it1 = setClusters.begin();
-       it1 != setClusters.end(); ++it1) {
-    set<set<int> >::const_iterator it2 = it1;
-    ++it2;
-    for (; it2 != setClusters.end(); ++it2) {
-      // now sum over all dist
-      for (set<int>::const_iterator it3 = it1->begin(); it3 != it1->end();
-           ++it3) {
-        for (set<int>::const_iterator it4 = it2->begin(); it4 != it2->end();
-             ++it4) {
-          res += GetDistance(*it3, *it4);
-          ++numDist;
+double PhyloDistance ::CalcAveDistBtwClusters(const set<set<int>> &setClusters) const
+{
+    //
+    double res = 0.0;
+    int numDist = 0;
+
+    for (set<set<int>>::const_iterator it1 = setClusters.begin(); it1 != setClusters.end(); ++it1)
+    {
+        set<set<int>>::const_iterator it2 = it1;
+        ++it2;
+        for (; it2 != setClusters.end(); ++it2)
+        {
+            // now sum over all dist
+            for (set<int>::const_iterator it3 = it1->begin(); it3 != it1->end(); ++it3)
+            {
+                for (set<int>::const_iterator it4 = it2->begin(); it4 != it2->end(); ++it4)
+                {
+                    res += GetDistance(*it3, *it4);
+                    ++numDist;
+                }
+            }
         }
-      }
     }
-  }
 
-  return res / numDist;
+    return res / numDist;
 }
 
-void PhyloDistance ::Dump() const {
-  //
-  for (map<pair<int, int>, double>::const_iterator it = mapDists.begin();
-       it != mapDists.end(); ++it) {
-    cout << "[" << it->first.first << "," << it->first.second
-         << "]: " << it->second << endl;
-  }
+void PhyloDistance ::Dump() const
+{
+    //
+    for (map<pair<int, int>, double>::const_iterator it = mapDists.begin(); it != mapDists.end(); ++it)
+    {
+        cout << "[" << it->first.first << "," << it->first.second << "]: " << it->second << endl;
+    }
 }
 
 // distance based tree builder
 
-DistanceTreeBuilder ::DistanceTreeBuilder(PhyloDistance &distPairwiseTaxaIn)
-    : distPairwiseTaxa(distPairwiseTaxaIn), taxonOutgroup(-1) {
-  //
+DistanceTreeBuilder ::DistanceTreeBuilder(PhyloDistance &distPairwiseTaxaIn) : distPairwiseTaxa(distPairwiseTaxaIn), taxonOutgroup(-1)
+{
+    //
 }
 
 // build tree using neighbor joining
-string DistanceTreeBuilder ::NJ() {
-  // get all the things into the search set
-  set<int> nodesToSearch;
-  distPairwiseTaxa.GetAllNodes(nodesToSearch);
-
-  // must have at least two nodes
-  YW_ASSERT_INFO(nodesToSearch.size() >= 2, "Must have two nodes at least");
-
-  // get the next largest one
-  int nodeToUse = 1 + (*nodesToSearch.rbegin());
-
-  // build a Newick string
-  string strNW;
-  map<int, string> mapSubtreeStr;
-  for (set<int>::iterator it = nodesToSearch.begin(); it != nodesToSearch.end();
-       ++it) {
-    //
-    // char buf[100];
-    // sprintf(buf, "%d", *it);
-    // string strName = buf;
-    string strName = GetTaxonNameFor(*it);
-    mapSubtreeStr.insert(map<int, string>::value_type(*it, strName));
-    // cout << "Init node: " << *it << ": string: " << strName << endl;
-  }
-
-  int ngbr1 = -1, ngbr2 = -1;
-  while (nodesToSearch.size() >= 3) {
-    NJFindNgbrs(nodeToUse, nodesToSearch, ngbr1, ngbr2);
-    // cout << "Neighbors found: " << ngbr1 << ", " << ngbr2 << ", and merged
-    // into node: " << nodeToUse << endl;
+string DistanceTreeBuilder ::NJ()
+{
+    // get all the things into the search set
+    set<int> nodesToSearch;
+    distPairwiseTaxa.GetAllNodes(nodesToSearch);
+
+    // must have at least two nodes
+    YW_ASSERT_INFO(nodesToSearch.size() >= 2, "Must have two nodes at least");
+
+    // get the next largest one
+    int nodeToUse = 1 + (*nodesToSearch.rbegin());
+
+    // build a Newick string
+    string strNW;
+    map<int, string> mapSubtreeStr;
+    for (set<int>::iterator it = nodesToSearch.begin(); it != nodesToSearch.end(); ++it)
+    {
+        //
+        //char buf[100];
+        //sprintf(buf, "%d", *it);
+        //string strName = buf;
+        string strName = GetTaxonNameFor(*it);
+        mapSubtreeStr.insert(map<int, string>::value_type(*it, strName));
+        //cout << "Init node: " << *it << ": string: " << strName << endl;
+    }
+
+    int ngbr1 = -1, ngbr2 = -1;
+    while (nodesToSearch.size() >= 3)
+    {
+        NJFindNgbrs(nodeToUse, nodesToSearch, ngbr1, ngbr2);
+        //cout << "Neighbors found: " << ngbr1 << ", " << ngbr2 << ", and merged into node: " << nodeToUse << endl;
+
+        char buf1[100];
+        sprintf(buf1, "%f", distPairwiseTaxa.GetDistanceNonNeg(nodeToUse, ngbr1));
+        string strDist1 = buf1;
+        char buf2[100];
+        sprintf(buf2, "%f", distPairwiseTaxa.GetDistanceNonNeg(nodeToUse, ngbr2));
+        string strDist2 = buf2;
+
+        string strSubtree = "(" + mapSubtreeStr[ngbr1] + ":" + strDist1 + "," + mapSubtreeStr[ngbr2] + ":" + strDist2 + ")";
+        mapSubtreeStr.insert(map<int, string>::value_type(nodeToUse, strSubtree));
+        //cout << "For node: " << nodeToUse << ": string: " << strSubtree << endl;
+        ++nodeToUse;
+    }
+
+    // create a root
+    int rootNode = nodeToUse;
+    int ngbr1Final = *(nodesToSearch.begin());
+    int ngbr2Final = *(nodesToSearch.rbegin());
+    double distRootBranch = distPairwiseTaxa.GetDistanceNonNeg(ngbr1Final, ngbr2Final);
+    double distNew = 0.5 * distRootBranch;
+    distPairwiseTaxa.SetDistance(rootNode, ngbr1Final, distNew);
+    distPairwiseTaxa.SetDistance(rootNode, ngbr2Final, distNew);
 
+    // final subtree
     char buf1[100];
-    sprintf(buf1, "%f", distPairwiseTaxa.GetDistanceNonNeg(nodeToUse, ngbr1));
-    string strDist1 = buf1;
-    char buf2[100];
-    sprintf(buf2, "%f", distPairwiseTaxa.GetDistanceNonNeg(nodeToUse, ngbr2));
-    string strDist2 = buf2;
-
-    string strSubtree = "(" + mapSubtreeStr[ngbr1] + ":" + strDist1 + "," +
-                        mapSubtreeStr[ngbr2] + ":" + strDist2 + ")";
-    mapSubtreeStr.insert(map<int, string>::value_type(nodeToUse, strSubtree));
-    // cout << "For node: " << nodeToUse << ": string: " << strSubtree << endl;
-    ++nodeToUse;
-  }
-
-  // create a root
-  int rootNode = nodeToUse;
-  int ngbr1Final = *(nodesToSearch.begin());
-  int ngbr2Final = *(nodesToSearch.rbegin());
-  double distRootBranch =
-      distPairwiseTaxa.GetDistanceNonNeg(ngbr1Final, ngbr2Final);
-  double distNew = 0.5 * distRootBranch;
-  distPairwiseTaxa.SetDistance(rootNode, ngbr1Final, distNew);
-  distPairwiseTaxa.SetDistance(rootNode, ngbr2Final, distNew);
-
-  // final subtree
-  char buf1[100];
-  sprintf(buf1, "%f", distNew);
-  string strDist = buf1;
-  string strSubtree = "(" + mapSubtreeStr[ngbr1Final] + ":" + strDist + "," +
-                      mapSubtreeStr[ngbr2Final] + ":" + strDist + ")";
-  // cout << "Final neighbor joining tree: " << strSubtree << endl;
-  // now dump out all distances
-  // distPairwiseTaxa.Dump();
-
-  return strSubtree;
+    sprintf(buf1, "%f", distNew);
+    string strDist = buf1;
+    string strSubtree = "(" + mapSubtreeStr[ngbr1Final] + ":" + strDist + "," + mapSubtreeStr[ngbr2Final] + ":" + strDist + ")";
+    //cout << "Final neighbor joining tree: " << strSubtree << endl;
+    // now dump out all distances
+    //distPairwiseTaxa.Dump();
+
+    return strSubtree;
 }
 
-void DistanceTreeBuilder ::NJFindNgbrs(int nodeIdNew, set<int> &nodesToSearch,
-                                       int &ngbr1, int &ngbr2) {
-  // cout << "set of nodes to search: ";
-  // DumpIntSet( nodesToSearch);
-  // find two best ngbrs from the nodes to search (which are
-  // ngbr1 and ngbr2, and remove these two from the nodes to search)
-  // create a new node with the given id, add into nodestosearch, update dists
-
-  // first compute ave distances of all current nodes
-  map<int, double> mapAveDists;
-  for (set<int>::iterator it = nodesToSearch.begin(); it != nodesToSearch.end();
-       ++it) {
-    //
-    double dist = NJCalcAveDist(*it, nodesToSearch);
-    // cout << "single node distance for " << *it << ": " << dist << endl;
-    mapAveDists.insert(map<int, double>::value_type(*it, dist));
-  }
-
-  // search all pair to find the best one to merge
-  double distNJMin = HAP_MAX_INT * 1.0;
-  int node1Min = -1;
-  int node2Min = -1;
-  double dist12Min = 0.0;
-  for (set<int>::iterator it1 = nodesToSearch.begin();
-       it1 != nodesToSearch.end(); ++it1) {
-    int node1cur = *it1;
-    // don't consider outgroup
-    if (node1cur == taxonOutgroup) {
-      continue;
+void DistanceTreeBuilder ::NJFindNgbrs(int nodeIdNew, set<int> &nodesToSearch, int &ngbr1, int &ngbr2)
+{
+    //cout << "set of nodes to search: ";
+    //DumpIntSet( nodesToSearch);
+    // find two best ngbrs from the nodes to search (which are
+    // ngbr1 and ngbr2, and remove these two from the nodes to search)
+    // create a new node with the given id, add into nodestosearch, update dists
+
+    // first compute ave distances of all current nodes
+    map<int, double> mapAveDists;
+    for (set<int>::iterator it = nodesToSearch.begin(); it != nodesToSearch.end(); ++it)
+    {
+        //
+        double dist = NJCalcAveDist(*it, nodesToSearch);
+        //cout << "single node distance for " << *it << ": " << dist << endl;
+        mapAveDists.insert(map<int, double>::value_type(*it, dist));
     }
 
-    double dist1 = mapAveDists[node1cur];
-
-    set<int>::iterator it2 = it1;
-    ++it2;
-
-    for (; it2 != nodesToSearch.end(); ++it2) {
-      int node2cur = *it2;
-      if (node2cur == taxonOutgroup) {
-        continue;
-      }
-      double dist12 = distPairwiseTaxa.GetDistance(node1cur, node2cur);
-      double dist2 = mapAveDists[node2cur];
-      double distNJ = dist12 - dist1 - dist2;
-      // cout << "For nodes: " << node1cur << ", " << node2cur << ", dist1=" <<
-      // dist1 << ", dist2: " << dist2 << ", dist12: " << dist12 << ", distNJ: "
-      // << distNJ << endl;
-      if (distNJ < distNJMin) {
-        distNJMin = distNJ;
-        node1Min = node1cur;
-        node2Min = node2cur;
-        dist12Min = dist12;
-      }
+    // search all pair to find the best one to merge
+    double distNJMin = HAP_MAX_INT * 1.0;
+    int node1Min = -1;
+    int node2Min = -1;
+    double dist12Min = 0.0;
+    for (set<int>::iterator it1 = nodesToSearch.begin(); it1 != nodesToSearch.end(); ++it1)
+    {
+        int node1cur = *it1;
+        // don't consider outgroup
+        if (node1cur == taxonOutgroup)
+        {
+            continue;
+        }
+
+        double dist1 = mapAveDists[node1cur];
+
+        set<int>::iterator it2 = it1;
+        ++it2;
+
+        for (; it2 != nodesToSearch.end(); ++it2)
+        {
+            int node2cur = *it2;
+            if (node2cur == taxonOutgroup)
+            {
+                continue;
+            }
+            double dist12 = distPairwiseTaxa.GetDistance(node1cur, node2cur);
+            double dist2 = mapAveDists[node2cur];
+            double distNJ = dist12 - dist1 - dist2;
+            //cout << "For nodes: " << node1cur << ", " << node2cur << ", dist1=" << dist1 << ", dist2: " << dist2 << ", dist12: " << dist12 << ", distNJ: " << distNJ << endl;
+            if (distNJ < distNJMin)
+            {
+                distNJMin = distNJ;
+                node1Min = node1cur;
+                node2Min = node2cur;
+                dist12Min = dist12;
+            }
+        }
     }
-  }
-
-  // add the new node with right dist
-  YW_ASSERT_INFO(node1Min >= 0 && node2Min >= 0, "Wrong");
-  double dist1toNew =
-      0.5 * dist12Min + 0.5 * (mapAveDists[node1Min] - mapAveDists[node2Min]);
-  double dist2toNew =
-      0.5 * dist12Min + 0.5 * (mapAveDists[node2Min] - mapAveDists[node1Min]);
-  distPairwiseTaxa.SetDistance(nodeIdNew, node1Min, dist1toNew);
-  distPairwiseTaxa.SetDistance(nodeIdNew, node2Min, dist2toNew);
-
-  // calc remaining distances
-  for (set<int>::iterator it = nodesToSearch.begin(); it != nodesToSearch.end();
-       ++it) {
-    int nodecur = *it;
-    if (nodecur == node1Min || nodecur == node2Min) {
-      continue;
+
+    // add the new node with right dist
+    YW_ASSERT_INFO(node1Min >= 0 && node2Min >= 0, "Wrong");
+    double dist1toNew = 0.5 * dist12Min + 0.5 * (mapAveDists[node1Min] - mapAveDists[node2Min]);
+    double dist2toNew = 0.5 * dist12Min + 0.5 * (mapAveDists[node2Min] - mapAveDists[node1Min]);
+    distPairwiseTaxa.SetDistance(nodeIdNew, node1Min, dist1toNew);
+    distPairwiseTaxa.SetDistance(nodeIdNew, node2Min, dist2toNew);
+
+    // calc remaining distances
+    for (set<int>::iterator it = nodesToSearch.begin(); it != nodesToSearch.end(); ++it)
+    {
+        int nodecur = *it;
+        if (nodecur == node1Min || nodecur == node2Min)
+        {
+            continue;
+        }
+        double distNew = 0.5 * (distPairwiseTaxa.GetDistance(nodecur, node1Min) + distPairwiseTaxa.GetDistance(nodecur, node2Min) - distPairwiseTaxa.GetDistance(node1Min, node2Min));
+        //dist12Min);
+        distPairwiseTaxa.SetDistance(nodeIdNew, nodecur, distNew);
     }
-    double distNew = 0.5 * (distPairwiseTaxa.GetDistance(nodecur, node1Min) +
-                            distPairwiseTaxa.GetDistance(nodecur, node2Min) -
-                            distPairwiseTaxa.GetDistance(node1Min, node2Min));
-    // dist12Min);
-    distPairwiseTaxa.SetDistance(nodeIdNew, nodecur, distNew);
-  }
-
-  // maintain the search set
-  nodesToSearch.insert(nodeIdNew);
-  nodesToSearch.erase(node1Min);
-  nodesToSearch.erase(node2Min);
-
-  ngbr1 = node1Min;
-  ngbr2 = node2Min;
+
+    // maintain the search set
+    nodesToSearch.insert(nodeIdNew);
+    nodesToSearch.erase(node1Min);
+    nodesToSearch.erase(node2Min);
+
+    ngbr1 = node1Min;
+    ngbr2 = node2Min;
 }
 
-double DistanceTreeBuilder ::NJCalcAveDist(int nodecur,
-                                           const set<int> &nodesToSearch) {
-  // calc average distance from nodecur to all nodes in the search set
-  // must have at least three nodes
-  YW_ASSERT_INFO(nodesToSearch.size() >= 3, "Too few nodes");
-  // YW_ASSERT_INFO( nodesToSearch.find(nodecur) != nodesToSearch.end(),
-  // "current node must be in the set");
-  double res = 0.0;
-  for (set<int>::const_iterator it = nodesToSearch.begin();
-       it != nodesToSearch.end(); ++it) {
-    if (*it != nodecur) {
-      res += distPairwiseTaxa.GetDistance(nodecur, *it);
+double DistanceTreeBuilder ::NJCalcAveDist(int nodecur, const set<int> &nodesToSearch)
+{
+    // calc average distance from nodecur to all nodes in the search set
+    // must have at least three nodes
+    YW_ASSERT_INFO(nodesToSearch.size() >= 3, "Too few nodes");
+    //YW_ASSERT_INFO( nodesToSearch.find(nodecur) != nodesToSearch.end(), "current node must be in the set");
+    double res = 0.0;
+    for (set<int>::const_iterator it = nodesToSearch.begin(); it != nodesToSearch.end(); ++it)
+    {
+        if (*it != nodecur)
+        {
+            res += distPairwiseTaxa.GetDistance(nodecur, *it);
+        }
     }
-  }
-  return res / (nodesToSearch.size() - 2);
+    return res / (nodesToSearch.size() - 2);
 }
 
-string DistanceTreeBuilder ::GetTaxonNameFor(int index) const {
-  //
-  map<int, string>::const_iterator it = mapIndexToName.find(index);
-  if (it == mapIndexToName.end()) {
-    // juse use the index itself
-    char buf[100];
-    sprintf(buf, "%d", index);
-    string res(buf);
-    return res;
-  } else {
-    return it->second;
-  }
+string DistanceTreeBuilder ::GetTaxonNameFor(int index) const
+{
+    //
+    map<int, string>::const_iterator it = mapIndexToName.find(index);
+    if (it == mapIndexToName.end())
+    {
+        // juse use the index itself
+        char buf[100];
+        sprintf(buf, "%d", index);
+        string res(buf);
+        return res;
+    }
+    else
+    {
+        return it->second;
+    }
 }
 
 //********************************************************************************************************
 // UPGMA utilities
 
-string DistanceTreeBuilder ::ConstrainedUPGMA(
-    const set<set<int> > &setClustersMustHave,
-    const set<set<int> > &setClustersForbidden, map<set<int>, double> &mapSTHts,
-    int numTotElem) {
-  // construct UPGMA trees with constraints that exclude some clusters and must
-  // have some clusters
-  map<pair<set<int>, set<int> >, double> mapClusDist;
-  map<set<int>, pair<string, double> > mapClusSubtree; // subtree with height
-  // init all singleton
-  set<int> nodesAll;
-  distPairwiseTaxa.GetAllNodes(nodesAll);
-  // cout << "nodesAll: ";
-  // DumpIntSet(nodesAll);
-  for (set<int>::const_iterator it1 = nodesAll.begin(); it1 != nodesAll.end();
-       ++it1) {
-    set<int> ss1;
-    ss1.insert(*it1);
-    string strLeaf = std::to_string(*it1);
-    pair<string, double> sp(strLeaf, 0.0);
-    mapClusSubtree.insert(
-        map<set<int>, pair<string, double> >::value_type(ss1, sp));
-    // cout << "Process leaf: " << strLeaf << endl;
-
-    set<int>::const_iterator it2 = it1;
-    ++it2;
-    for (; it2 != nodesAll.end(); ++it2) {
-      set<int> ss2;
-      ss2.insert(*it2);
-      pair<set<int>, set<int> > ss(ss1, ss2);
-      mapClusDist.insert(map<pair<set<int>, set<int> >, double>::value_type(
-          ss, distPairwiseTaxa.GetDistance(*it1, *it2)));
-      // cout << "init pairwise distance with leaf " << *it2 << " dist=" <<
-      // distPairwiseTaxa.GetDistance(*it1, *it2) << endl;
+string DistanceTreeBuilder ::ConstrainedUPGMA(const set<set<int>> &setClustersMustHave, const set<set<int>> &setClustersForbidden, map<set<int>, double> &mapSTHts, int numTotElem)
+{
+    // construct UPGMA trees with constraints that exclude some clusters and must have some clusters
+    map<pair<set<int>, set<int>>, double> mapClusDist;
+    map<set<int>, pair<string, double>> mapClusSubtree; // subtree with height
+    // init all singleton
+    set<int> nodesAll;
+    distPairwiseTaxa.GetAllNodes(nodesAll);
+    //cout << "nodesAll: ";
+    //DumpIntSet(nodesAll);
+    for (set<int>::const_iterator it1 = nodesAll.begin(); it1 != nodesAll.end(); ++it1)
+    {
+        set<int> ss1;
+        ss1.insert(*it1);
+        string strLeaf = std::to_string(*it1);
+        pair<string, double> sp(strLeaf, 0.0);
+        mapClusSubtree.insert(map<set<int>, pair<string, double>>::value_type(ss1, sp));
+        //cout << "Process leaf: " << strLeaf << endl;
+
+        set<int>::const_iterator it2 = it1;
+        ++it2;
+        for (; it2 != nodesAll.end(); ++it2)
+        {
+            set<int> ss2;
+            ss2.insert(*it2);
+            pair<set<int>, set<int>> ss(ss1, ss2);
+            mapClusDist.insert(map<pair<set<int>, set<int>>, double>::value_type(ss, distPairwiseTaxa.GetDistance(*it1, *it2)));
+            //cout << "init pairwise distance with leaf " << *it2 << " dist=" << distPairwiseTaxa.GetDistance(*it1, *it2) << endl;
+        }
     }
-  }
-  // now start UPGMA procedure
-  while (mapClusDist.size() >= 1) {
-    // cout << "size of mapClusDist: " << mapClusDist.size() << endl;
-    // find the smallest dist
-    map<pair<set<int>, set<int> >, double>::iterator itOpt = mapClusDist.end();
-    for (map<pair<set<int>, set<int> >, double>::iterator it =
-             mapClusDist.begin();
-         it != mapClusDist.end(); ++it) {
-      // cout << "Dist=" << it->second << ", subtree1: ";
-      // DumpIntSet(it->first.first);
-      // cout << "subtree2: ";
-      // DumpIntSet(it->first.second);
-      set<int> scoal = it->first.first;
-      UnionSets(scoal, it->first.second);
-
-      if (itOpt != mapClusDist.end() && itOpt->second <= it->second) {
-        // cout << "not optimal\n";
-        continue;
-      }
-
-      bool fForbid =
-          setClustersForbidden.find(scoal) != setClustersForbidden.end();
-      if (fForbid == true) {
-        // cout << "Not allowed\n";
-        continue;
-      }
-      bool fCompat = IsClusterIncompatibleWithSetofClus(
-          scoal, setClustersMustHave, numTotElem);
-
-      if (fCompat == true) {
-        itOpt = it;
-      } else {
-        // cout << "Not compatible\n";
-      }
+    // now start UPGMA procedure
+    while (mapClusDist.size() >= 1)
+    {
+        //cout << "size of mapClusDist: " << mapClusDist.size() << endl;
+        // find the smallest dist
+        map<pair<set<int>, set<int>>, double>::iterator itOpt = mapClusDist.end();
+        for (map<pair<set<int>, set<int>>, double>::iterator it = mapClusDist.begin(); it != mapClusDist.end(); ++it)
+        {
+            //cout << "Dist=" << it->second << ", subtree1: ";
+            //DumpIntSet(it->first.first);
+            //cout << "subtree2: ";
+            //DumpIntSet(it->first.second);
+            set<int> scoal = it->first.first;
+            UnionSets(scoal, it->first.second);
+
+            if (itOpt != mapClusDist.end() && itOpt->second <= it->second)
+            {
+                //cout << "not optimal\n";
+                continue;
+            }
+
+            bool fForbid = setClustersForbidden.find(scoal) != setClustersForbidden.end();
+            if (fForbid == true)
+            {
+                //cout << "Not allowed\n";
+                continue;
+            }
+            bool fCompat = IsClusterIncompatibleWithSetofClus(scoal, setClustersMustHave, numTotElem);
+
+            if (fCompat == true)
+            {
+                itOpt = it;
+            }
+            else
+            {
+                //cout << "Not compatible\n";
+            }
+        }
+        // must find something
+        if (itOpt == mapClusDist.end())
+        {
+            YW_ASSERT_INFO(false, "Fail to construct the tree");
+        }
+        //cout << "Best pair to merge: ";
+        //DumpIntSet(itOpt->first.first);
+        //cout << " and ";
+        //DumpIntSet(itOpt->first.second);
+        // now merge the two
+        set<int> ssNew = itOpt->first.first;
+        UnionSets(ssNew, itOpt->first.second);
+        YW_ASSERT_INFO(mapClusSubtree.find(itOpt->first.first) != mapClusSubtree.end() && mapClusSubtree.find(itOpt->first.second) != mapClusSubtree.end(), "Clusters: not found");
+        double htSt1 = mapClusSubtree[itOpt->first.first].second;
+        double htSt2 = mapClusSubtree[itOpt->first.second].second;
+        double distSt1 = itOpt->second / 2 - htSt1;
+        double distSt2 = itOpt->second / 2 - htSt2;
+        //YW_ASSERT_INFO( distSt1 >= 0.0 && distSt2 >= 0.0, "Distance: should be positive" );
+        string strDist1 = std::to_string(distSt1);
+        string strDist2 = std::to_string(distSt2);
+        string strST = "(";
+        strST += mapClusSubtree[itOpt->first.first].first;
+        strST += ":";
+        strST += strDist1;
+        strST += ",";
+        strST += mapClusSubtree[itOpt->first.second].first;
+        strST += ":";
+        strST += strDist2;
+        strST += ")";
+        pair<string, double> sp2(strST, itOpt->second / 2);
+        mapClusSubtree.insert(map<set<int>, pair<string, double>>::value_type(ssNew, sp2));
+        //cout << "subtree: " << strST << ", height: " << itOpt->second/2 << ", for subtree: ";
+        //DumpIntSet( ssNew );
+        // update the distance map
+        UpdateDistUPGMA(itOpt->first, mapClusSubtree, mapClusDist);
+        //cout << "mapClusDist: size = " << mapClusDist.size() << endl;
     }
-    // must find something
-    if (itOpt == mapClusDist.end()) {
-      YW_ASSERT_INFO(false, "Fail to construct the tree");
+    //
+    YW_ASSERT_INFO(mapClusSubtree.find(nodesAll) != mapClusSubtree.end(), "Not fully constructed yet");
+    string strNWHt = mapClusSubtree[nodesAll].first;
+
+    // record subtree ht
+    mapSTHts.clear();
+    for (map<set<int>, pair<string, double>>::iterator it = mapClusSubtree.begin(); it != mapClusSubtree.end(); ++it)
+    {
+        mapSTHts.insert(map<set<int>, double>::value_type(it->first, it->second.second));
     }
-    // cout << "Best pair to merge: ";
-    // DumpIntSet(itOpt->first.first);
-    // cout << " and ";
-    // DumpIntSet(itOpt->first.second);
-    // now merge the two
-    set<int> ssNew = itOpt->first.first;
-    UnionSets(ssNew, itOpt->first.second);
-    YW_ASSERT_INFO(
-        mapClusSubtree.find(itOpt->first.first) != mapClusSubtree.end() &&
-            mapClusSubtree.find(itOpt->first.second) != mapClusSubtree.end(),
-        "Clusters: not found");
-    double htSt1 = mapClusSubtree[itOpt->first.first].second;
-    double htSt2 = mapClusSubtree[itOpt->first.second].second;
-    double distSt1 = itOpt->second / 2 - htSt1;
-    double distSt2 = itOpt->second / 2 - htSt2;
-    // YW_ASSERT_INFO( distSt1 >= 0.0 && distSt2 >= 0.0, "Distance: should be
-    // positive" );
-    string strDist1 = std::to_string(distSt1);
-    string strDist2 = std::to_string(distSt2);
-    string strST = "(";
-    strST += mapClusSubtree[itOpt->first.first].first;
-    strST += ":";
-    strST += strDist1;
-    strST += ",";
-    strST += mapClusSubtree[itOpt->first.second].first;
-    strST += ":";
-    strST += strDist2;
-    strST += ")";
-    pair<string, double> sp2(strST, itOpt->second / 2);
-    mapClusSubtree.insert(
-        map<set<int>, pair<string, double> >::value_type(ssNew, sp2));
-    // cout << "subtree: " << strST << ", height: " << itOpt->second/2 << ", for
-    // subtree: "; DumpIntSet( ssNew );
-    // update the distance map
-    UpdateDistUPGMA(itOpt->first, mapClusSubtree, mapClusDist);
-    // cout << "mapClusDist: size = " << mapClusDist.size() << endl;
-  }
-  //
-  YW_ASSERT_INFO(mapClusSubtree.find(nodesAll) != mapClusSubtree.end(),
-                 "Not fully constructed yet");
-  string strNWHt = mapClusSubtree[nodesAll].first;
-
-  // record subtree ht
-  mapSTHts.clear();
-  for (map<set<int>, pair<string, double> >::iterator it =
-           mapClusSubtree.begin();
-       it != mapClusSubtree.end(); ++it) {
-    mapSTHts.insert(
-        map<set<int>, double>::value_type(it->first, it->second.second));
-  }
-
-  // strNWHt += ":" + std::to_string( mapClusSubtree[nodesAll].second );
-  return strNWHt;
+
+    //strNWHt += ":" + std::to_string( mapClusSubtree[nodesAll].second );
+    return strNWHt;
 }
 
-bool DistanceTreeBuilder ::IsClusterIncompatible(const set<int> &clus1,
-                                                 const set<int> &clus2,
-                                                 int numTotElem) const {
-  // cout << "Clus1: ";
-  // DumpIntSet(clus1);
-  // cout << "clus2: ";
-  // DumpIntSet(clus2);
-  // four gamate test
-  set<int> sint;
-  JoinSets(clus1, clus2, sint);
-  if (sint.size() == 0) {
-    return true;
-  }
-  // set<int> sdiff1 = clus1;
-  // SubtractSets(sdiff1, clus2);
-  if (sint == clus1 || sint == clus2) {
-    return true;
-  }
-  if (numTotElem > 0) {
-    set<int> sunion = clus1;
-    UnionSets(sunion, clus2);
-    if ((int)sunion.size() == numTotElem) {
-      return true;
+bool DistanceTreeBuilder ::IsClusterIncompatible(const set<int> &clus1, const set<int> &clus2, int numTotElem) const
+{
+
+    // four gamate test
+    set<int> sint;
+    JoinSets(clus1, clus2, sint);
+    if (sint.size() == 0)
+    {
+        return true;
+    }
+    //set<int> sdiff1 = clus1;
+    //SubtractSets(sdiff1, clus2);
+    if (sint == clus1 || sint == clus2)
+    {
+        return true;
     }
-  }
-  // set<int> sdiff2 = clus2;
-  // SubtractSets(sdiff2, clus1);
-  // if( sdiff2.size() == clus1.size() )
-  //{
-  //    return true;
-  //}
-  return false;
+    if (numTotElem > 0)
+    {
+        set<int> sunion = clus1;
+        UnionSets(sunion, clus2);
+        if ((int)sunion.size() == numTotElem)
+        {
+            return true;
+        }
+    }
+
+    return false;
 }
 
-bool DistanceTreeBuilder ::IsClusterIncompatibleWithSetofClus(
-    const set<int> &clus1, const set<set<int> > &setClus,
-    int numTotElem) const {
-  //
-  for (set<set<int> >::const_iterator it = setClus.begin(); it != setClus.end();
-       ++it) {
-    if (IsClusterIncompatible(clus1, *it, numTotElem) == false) {
-      return false;
+bool DistanceTreeBuilder ::IsClusterIncompatibleWithSetofClus(const set<int> &clus1, const set<set<int>> &setClus, int numTotElem) const
+{
+    //
+    for (set<set<int>>::const_iterator it = setClus.begin(); it != setClus.end(); ++it)
+    {
+        if (IsClusterIncompatible(clus1, *it, numTotElem) == false)
+        {
+            return false;
+        }
     }
-  }
-  return true;
+    return true;
 }
 
-void DistanceTreeBuilder ::UpdateDistUPGMA(
-    const pair<set<int>, set<int> > &pairClus,
-    const map<set<int>, pair<string, double> > &mapSubtree,
-    map<pair<set<int>, set<int> >, double> &distMapCur) {
-  // remove all entries with one components as the newly merged subtree
-  map<pair<set<int>, set<int> >, double> distMapUpdated;
-  // set<set<int> > setClusCurr;
-  for (map<pair<set<int>, set<int> >, double>::iterator it = distMapCur.begin();
-       it != distMapCur.end(); ++it) {
-    if (it->first.first != pairClus.first &&
-        it->first.first != pairClus.second &&
-        it->first.second != pairClus.first &&
-        it->first.second != pairClus.second) {
-      distMapUpdated.insert(*it);
-      // setClusCurr.insert( it->first.first );
-      // setClusCurr.insert( it->first.second );
-    }
-  }
-
-  set<int> snew = pairClus.first;
-  UnionSets(snew, pairClus.second);
-  YW_ASSERT_INFO(mapSubtree.find(snew) != mapSubtree.end(), "Fail to find223");
-
-  // collect all subsets that are not done yet
-  set<set<int> > setsToProc;
-  for (map<pair<set<int>, set<int> >, double>::const_iterator it =
-           distMapCur.begin();
-       it != distMapCur.end(); ++it) {
-    setsToProc.insert(it->first.first);
-    setsToProc.insert(it->first.second);
-  }
-
-  // now update distance with the new one
-  // set< set<int> > setsDone;
-  for (set<set<int> >::const_iterator it = setsToProc.begin();
-       it != setsToProc.end(); ++it) {
-    // if( setsDone.find(*it) != setsDone.end() )
-    //{
-    //    continue;
-    //}
-    // setsDone.insert( *it );
-    // cout << "process cluster: ";
-    // DumpIntSet(*it);
-    if (*it == pairClus.first || *it == pairClus.second || *it == snew) {
-      // cout << "Skipped\n";
-      continue;
+void DistanceTreeBuilder ::UpdateDistUPGMA(const pair<set<int>, set<int>> &pairClus, const map<set<int>, pair<string, double>> &mapSubtree, map<pair<set<int>, set<int>>, double> &distMapCur)
+{
+    // remove all entries with one components as the newly merged subtree
+    map<pair<set<int>, set<int>>, double> distMapUpdated;
+    //set<set<int> > setClusCurr;
+    for (map<pair<set<int>, set<int>>, double>::iterator it = distMapCur.begin(); it != distMapCur.end(); ++it)
+    {
+        if (it->first.first != pairClus.first && it->first.first != pairClus.second && it->first.second != pairClus.first && it->first.second != pairClus.second)
+        {
+            distMapUpdated.insert(*it);
+            //setClusCurr.insert( it->first.first );
+            //setClusCurr.insert( it->first.second );
+        }
     }
 
-    set<int> s1 = snew;
-    set<int> s2 = *it;
-    if (s2 < s1) {
-      s1 = *it;
-      s2 = snew;
-    }
+    set<int> snew = pairClus.first;
+    UnionSets(snew, pairClus.second);
+    YW_ASSERT_INFO(mapSubtree.find(snew) != mapSubtree.end(), "Fail to find223");
 
-    pair<set<int>, set<int> > pp1(pairClus.first, *it);
-    if (*it < pairClus.first) {
-      pp1.first = *it;
-      pp1.second = pairClus.first;
+    // collect all subsets that are not done yet
+    set<set<int>> setsToProc;
+    for (map<pair<set<int>, set<int>>, double>::const_iterator it = distMapCur.begin(); it != distMapCur.end(); ++it)
+    {
+        setsToProc.insert(it->first.first);
+        setsToProc.insert(it->first.second);
     }
-    // cout << "pp1: ";
-    // DumpIntSet(pp1.first);
-    // DumpIntSet(pp1.second);
-    YW_ASSERT_INFO(distMapCur.find(pp1) != distMapCur.end(), "Fail to find111");
-    double htSt1 = distMapCur[pp1];
-    pair<set<int>, set<int> > pp2(pairClus.second, *it);
-    if (*it < pairClus.second) {
-      pp2.first = *it;
-      pp2.second = pairClus.second;
+
+    // now update distance with the new one
+    //set< set<int> > setsDone;
+    for (set<set<int>>::const_iterator it = setsToProc.begin(); it != setsToProc.end(); ++it)
+    {
+        //if( setsDone.find(*it) != setsDone.end() )
+        //{
+        //    continue;
+        //}
+        //setsDone.insert( *it );
+        //cout << "process cluster: ";
+        //DumpIntSet(*it);
+        if (*it == pairClus.first || *it == pairClus.second || *it == snew)
+        {
+            //cout << "Skipped\n";
+            continue;
+        }
+
+        set<int> s1 = snew;
+        set<int> s2 = *it;
+        if (s2 < s1)
+        {
+            s1 = *it;
+            s2 = snew;
+        }
+
+        pair<set<int>, set<int>> pp1(pairClus.first, *it);
+        if (*it < pairClus.first)
+        {
+            pp1.first = *it;
+            pp1.second = pairClus.first;
+        }
+        //cout << "pp1: ";
+        //DumpIntSet(pp1.first);
+        //DumpIntSet(pp1.second);
+        YW_ASSERT_INFO(distMapCur.find(pp1) != distMapCur.end(), "Fail to find111");
+        double htSt1 = distMapCur[pp1];
+        pair<set<int>, set<int>> pp2(pairClus.second, *it);
+        if (*it < pairClus.second)
+        {
+            pp2.first = *it;
+            pp2.second = pairClus.second;
+        }
+        YW_ASSERT_INFO(distMapCur.find(pp2) != distMapCur.end(), "Fail to find112");
+        double htSt2 = distMapCur[pp2];
+        double distNew = (pairClus.first.size() * htSt1 + pairClus.second.size() * htSt2) / (pairClus.first.size() + pairClus.second.size());
+        //cout << "htSt1: " << htSt1 << ", htSt2: " << htSt2 << ", distNew: " << distNew << ", for clusters: " << endl;
+        //DumpIntSet(s1);
+        //DumpIntSet(s2);
+        pair<set<int>, set<int>> sp(s1, s2);
+        distMapUpdated.insert(map<pair<set<int>, set<int>>, double>::value_type(sp, distNew));
     }
-    YW_ASSERT_INFO(distMapCur.find(pp2) != distMapCur.end(), "Fail to find112");
-    double htSt2 = distMapCur[pp2];
-    double distNew =
-        (pairClus.first.size() * htSt1 + pairClus.second.size() * htSt2) /
-        (pairClus.first.size() + pairClus.second.size());
-    // cout << "htSt1: " << htSt1 << ", htSt2: " << htSt2 << ", distNew: " <<
-    // distNew << ", for clusters: " << endl; DumpIntSet(s1); DumpIntSet(s2);
-    pair<set<int>, set<int> > sp(s1, s2);
-    distMapUpdated.insert(
-        map<pair<set<int>, set<int> >, double>::value_type(sp, distNew));
-  }
-
-  // update map
-  distMapCur = distMapUpdated;
+
+    // update map
+    distMapCur = distMapUpdated;
 }
 
-string DistanceTreeBuilder ::ConstrainedUPGMA(
-    const set<set<int> > &setClustersMustHave,
-    const set<set<int> > &setClustersDesired, int numTopCandidates,
-    const set<set<int> > &setClustersForbidden, map<set<int>, double> &mapSTHts,
-    int numTotElem) {
-  // picking the top-k candidates that matches the best of the desired splits
-  // construct UPGMA trees with constraints that exclude some clusters and must
-  // have some clusters
-  map<pair<set<int>, set<int> >, double> mapClusDist;
-  map<set<int>, pair<string, double> > mapClusSubtree; // subtree with height
-  // init all singleton
-  set<int> nodesAll;
-  distPairwiseTaxa.GetAllNodes(nodesAll);
-  // cout << "nodesAll: ";
-  // DumpIntSet(nodesAll);
-  for (set<int>::const_iterator it1 = nodesAll.begin(); it1 != nodesAll.end();
-       ++it1) {
-    set<int> ss1;
-    ss1.insert(*it1);
-    string strLeaf = std::to_string(*it1);
-    pair<string, double> sp(strLeaf, 0.0);
-    mapClusSubtree.insert(
-        map<set<int>, pair<string, double> >::value_type(ss1, sp));
-    // cout << "Process leaf: " << strLeaf << endl;
-
-    set<int>::const_iterator it2 = it1;
-    ++it2;
-    for (; it2 != nodesAll.end(); ++it2) {
-      set<int> ss2;
-      ss2.insert(*it2);
-      pair<set<int>, set<int> > ss(ss1, ss2);
-      mapClusDist.insert(map<pair<set<int>, set<int> >, double>::value_type(
-          ss, distPairwiseTaxa.GetDistance(*it1, *it2)));
-      // cout << "init pairwise distance with leaf " << *it2 << " dist=" <<
-      // distPairwiseTaxa.GetDistance(*it1, *it2) << endl;
-    }
-  }
-  // now start UPGMA procedure
-  while (mapClusDist.size() >= 1) {
-    // cout << "size of mapClusDist: " << mapClusDist.size() << endl;
-    // find the smallest dist
-    map<double, set<pair<set<int>, set<int> > > > mapScoredPairs;
-    int index = 0;
-    const double MIN_DIST_INC = 0.00000000000000000000000001;
-    for (map<pair<set<int>, set<int> >, double>::iterator it =
-             mapClusDist.begin();
-         it != mapClusDist.end(); ++it) {
-      ++index;
-      // cout << "Dist=" << it->second << ", subtree1: ";
-      // DumpIntSet(it->first.first);
-      // cout << "subtree2: ";
-      // DumpIntSet(it->first.second);
-      set<int> scoal = it->first.first;
-      UnionSets(scoal, it->first.second);
-      double distUse = it->second + index * MIN_DIST_INC;
-
-      // if( (int)mapScoredPairs.size() >= numTopCandidates &&
-      // mapScoredPairs.rbegin()->first < distUse )
-      //{
-      //    //cout << "not optimal\n";
-      //    continue;
-      //}
-
-      bool fForbid =
-          setClustersForbidden.find(scoal) != setClustersForbidden.end();
-      if (fForbid == true) {
-        // cout << "Not allowed\n";
-        continue;
-      }
-      bool fCompat = IsClusterIncompatibleWithSetofClus(
-          scoal, setClustersMustHave, numTotElem);
-
-      if (fCompat == true) {
-        // add it to the list
-        mapScoredPairs[distUse].insert(it->first);
-      } else {
-        // cout << "Not compatible\n";
-      }
+string DistanceTreeBuilder ::ConstrainedUPGMA(const set<set<int>> &setClustersMustHave, const set<set<int>> &setClustersDesired, int numTopCandidates, const set<set<int>> &setClustersForbidden, map<set<int>, double> &mapSTHts, int numTotElem)
+{
+    // picking the top-k candidates that matches the best of the desired splits
+    // construct UPGMA trees with constraints that exclude some clusters and must have some clusters
+    map<pair<set<int>, set<int>>, double> mapClusDist;
+    map<set<int>, pair<string, double>> mapClusSubtree; // subtree with height
+    // init all singleton
+    set<int> nodesAll;
+    distPairwiseTaxa.GetAllNodes(nodesAll);
+    //cout << "nodesAll: ";
+    //DumpIntSet(nodesAll);
+    for (set<int>::const_iterator it1 = nodesAll.begin(); it1 != nodesAll.end(); ++it1)
+    {
+        set<int> ss1;
+        ss1.insert(*it1);
+        string strLeaf = std::to_string(*it1);
+        pair<string, double> sp(strLeaf, 0.0);
+        mapClusSubtree.insert(map<set<int>, pair<string, double>>::value_type(ss1, sp));
+        //cout << "Process leaf: " << strLeaf << endl;
+
+        set<int>::const_iterator it2 = it1;
+        ++it2;
+        for (; it2 != nodesAll.end(); ++it2)
+        {
+            set<int> ss2;
+            ss2.insert(*it2);
+            pair<set<int>, set<int>> ss(ss1, ss2);
+            mapClusDist.insert(map<pair<set<int>, set<int>>, double>::value_type(ss, distPairwiseTaxa.GetDistance(*it1, *it2)));
+            //cout << "init pairwise distance with leaf " << *it2 << " dist=" << distPairwiseTaxa.GetDistance(*it1, *it2) << endl;
+        }
     }
-    // find the best
-    int maxDesired = -1;
-    pair<set<int>, set<int> > ppBest;
-    double htBest = 0.0;
-    YW_ASSERT_INFO(mapScoredPairs.size() > 0, "Must have some candidates");
-    const double THRES_DIST_RATIO = 1.05;
-    double minHit = mapScoredPairs.begin()->first;
-    int index2 = 0;
-    for (map<double, set<pair<set<int>, set<int> > > >::iterator it =
-             mapScoredPairs.begin();
-         it != mapScoredPairs.end(); ++it, ++index2) {
-      if (index2 >= numTopCandidates || it->first > minHit * THRES_DIST_RATIO) {
-        break;
-      }
-
-      for (set<pair<set<int>, set<int> > >::const_iterator it2 =
-               it->second.begin();
-           it2 != it->second.end(); ++it2) {
-        // cout << "Choice: ";
-        // DumpIntSet(it2->first);
-        // cout << "   with ";
-        // DumpIntSet(it2->second);
-        set<int> ssCombo = it2->first;
-        UnionSets(ssCombo, it2->second);
-
-        // YW: desired clade must have exact match
-        int numDesired = 0;
-        if (setClustersDesired.find(ssCombo) != setClustersDesired.end()) {
-          numDesired = 1;
+    // now start UPGMA procedure
+    while (mapClusDist.size() >= 1)
+    {
+        //cout << "size of mapClusDist: " << mapClusDist.size() << endl;
+        // find the smallest dist
+        map<double, set<pair<set<int>, set<int>>>> mapScoredPairs;
+        int index = 0;
+        const double MIN_DIST_INC = 0.00000000000000000000000001;
+        for (map<pair<set<int>, set<int>>, double>::iterator it = mapClusDist.begin(); it != mapClusDist.end(); ++it)
+        {
+            ++index;
+            //cout << "Dist=" << it->second << ", subtree1: ";
+            //DumpIntSet(it->first.first);
+            //cout << "subtree2: ";
+            //DumpIntSet(it->first.second);
+            set<int> scoal = it->first.first;
+            UnionSets(scoal, it->first.second);
+            double distUse = it->second + index * MIN_DIST_INC;
+
+            //if( (int)mapScoredPairs.size() >= numTopCandidates && mapScoredPairs.rbegin()->first < distUse )
+            //{
+            //    //cout << "not optimal\n";
+            //    continue;
+            //}
+
+            bool fForbid = setClustersForbidden.find(scoal) != setClustersForbidden.end();
+            if (fForbid == true)
+            {
+                //cout << "Not allowed\n";
+                continue;
+            }
+            bool fCompat = IsClusterIncompatibleWithSetofClus(scoal, setClustersMustHave, numTotElem);
+
+            if (fCompat == true)
+            {
+                // add it to the list
+                mapScoredPairs[distUse].insert(it->first);
+            }
+            else
+            {
+                //cout << "Not compatible\n";
+            }
         }
+        // find the best
+        int maxDesired = -1;
+        pair<set<int>, set<int>> ppBest;
+        double htBest = 0.0;
+        YW_ASSERT_INFO(mapScoredPairs.size() > 0, "Must have some candidates");
+        const double THRES_DIST_RATIO = 1.05;
+        double minHit = mapScoredPairs.begin()->first;
+        int index2 = 0;
+        for (map<double, set<pair<set<int>, set<int>>>>::iterator it = mapScoredPairs.begin(); it != mapScoredPairs.end(); ++it, ++index2)
+        {
+            if (index2 >= numTopCandidates || it->first > minHit * THRES_DIST_RATIO)
+            {
+                break;
+            }
 
-        // int numDesired = GetNumCompatCladesIn( ssCombo, setClustersDesired,
-        // numTotElem );
-        if (numDesired > maxDesired) {
-          maxDesired = numDesired;
-          ppBest = *it2;
-          htBest = it->first;
+            for (set<pair<set<int>, set<int>>>::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
+            {
+                //cout << "Choice: ";
+                //DumpIntSet(it2->first);
+                //cout << "   with ";
+                //DumpIntSet(it2->second);
+                set<int> ssCombo = it2->first;
+                UnionSets(ssCombo, it2->second);
+
+                // YW: desired clade must have exact match
+                int numDesired = 0;
+                if (setClustersDesired.find(ssCombo) != setClustersDesired.end())
+                {
+                    numDesired = 1;
+                }
+
+                //int numDesired = GetNumCompatCladesIn( ssCombo, setClustersDesired, numTotElem );
+                if (numDesired > maxDesired)
+                {
+                    maxDesired = numDesired;
+                    ppBest = *it2;
+                    htBest = it->first;
+                }
+                //cout << "Hitting number of desired one: " << numDesired << endl;
+            }
         }
-        // cout << "Hitting number of desired one: " << numDesired << endl;
-      }
+        //cout << "Chosen one: ";
+        //DumpIntSet(ppBest.first);
+        //cout << "    with ";
+        //DumpIntSet(ppBest.second);
+
+        //cout << "Best pair to merge: ";
+        //DumpIntSet(itOpt->first.first);
+        //cout << " and ";
+        //DumpIntSet(itOpt->first.second);
+        // now merge the two
+        set<int> ssNew = ppBest.first;
+        UnionSets(ssNew, ppBest.second);
+        YW_ASSERT_INFO(mapClusSubtree.find(ppBest.first) != mapClusSubtree.end() && mapClusSubtree.find(ppBest.second) != mapClusSubtree.end(), "Clusters: not found");
+        double htCurr = htBest;
+        double htSt1 = mapClusSubtree[ppBest.first].second;
+        double htSt2 = mapClusSubtree[ppBest.second].second;
+        double distSt1 = htBest / 2 - htSt1;
+        double distSt2 = htBest / 2 - htSt2;
+        //YW_ASSERT_INFO( distSt1 >= 0.0 && distSt2 >= 0.0, "Distance: should be positive" );
+        string strDist1 = std::to_string(distSt1);
+        string strDist2 = std::to_string(distSt2);
+        string strST = "(";
+        strST += mapClusSubtree[ppBest.first].first;
+        strST += ":";
+        strST += strDist1;
+        strST += ",";
+        strST += mapClusSubtree[ppBest.second].first;
+        strST += ":";
+        strST += strDist2;
+        strST += ")";
+        pair<string, double> sp2(strST, htCurr / 2);
+        mapClusSubtree.insert(map<set<int>, pair<string, double>>::value_type(ssNew, sp2));
+        //cout << "subtree: " << strST << ", height: " << itOpt->second/2 << ", for subtree: ";
+        //DumpIntSet( ssNew );
+        // update the distance map
+        UpdateDistUPGMA(ppBest, mapClusSubtree, mapClusDist);
+        //cout << "mapClusDist: size = " << mapClusDist.size() << endl;
     }
-    // cout << "Chosen one: ";
-    // DumpIntSet(ppBest.first);
-    // cout << "    with ";
-    // DumpIntSet(ppBest.second);
-
-    // cout << "Best pair to merge: ";
-    // DumpIntSet(itOpt->first.first);
-    // cout << " and ";
-    // DumpIntSet(itOpt->first.second);
-    // now merge the two
-    set<int> ssNew = ppBest.first;
-    UnionSets(ssNew, ppBest.second);
-    YW_ASSERT_INFO(mapClusSubtree.find(ppBest.first) != mapClusSubtree.end() &&
-                       mapClusSubtree.find(ppBest.second) !=
-                           mapClusSubtree.end(),
-                   "Clusters: not found");
-    double htCurr = htBest;
-    double htSt1 = mapClusSubtree[ppBest.first].second;
-    double htSt2 = mapClusSubtree[ppBest.second].second;
-    double distSt1 = htBest / 2 - htSt1;
-    double distSt2 = htBest / 2 - htSt2;
-    // YW_ASSERT_INFO( distSt1 >= 0.0 && distSt2 >= 0.0, "Distance: should be
-    // positive" );
-    string strDist1 = std::to_string(distSt1);
-    string strDist2 = std::to_string(distSt2);
-    string strST = "(";
-    strST += mapClusSubtree[ppBest.first].first;
-    strST += ":";
-    strST += strDist1;
-    strST += ",";
-    strST += mapClusSubtree[ppBest.second].first;
-    strST += ":";
-    strST += strDist2;
-    strST += ")";
-    pair<string, double> sp2(strST, htCurr / 2);
-    mapClusSubtree.insert(
-        map<set<int>, pair<string, double> >::value_type(ssNew, sp2));
-    // cout << "subtree: " << strST << ", height: " << itOpt->second/2 << ", for
-    // subtree: "; DumpIntSet( ssNew );
-    // update the distance map
-    UpdateDistUPGMA(ppBest, mapClusSubtree, mapClusDist);
-    // cout << "mapClusDist: size = " << mapClusDist.size() << endl;
-  }
-  //
-  YW_ASSERT_INFO(mapClusSubtree.find(nodesAll) != mapClusSubtree.end(),
-                 "Not fully constructed yet");
-  string strNWHt = mapClusSubtree[nodesAll].first;
-
-  // record subtree ht
-  mapSTHts.clear();
-  for (map<set<int>, pair<string, double> >::iterator it =
-           mapClusSubtree.begin();
-       it != mapClusSubtree.end(); ++it) {
-    mapSTHts.insert(
-        map<set<int>, double>::value_type(it->first, it->second.second));
-  }
-
-  // strNWHt += ":" + std::to_string( mapClusSubtree[nodesAll].second );
-  return strNWHt;
+    //
+    YW_ASSERT_INFO(mapClusSubtree.find(nodesAll) != mapClusSubtree.end(), "Not fully constructed yet");
+    string strNWHt = mapClusSubtree[nodesAll].first;
+
+    // record subtree ht
+    mapSTHts.clear();
+    for (map<set<int>, pair<string, double>>::iterator it = mapClusSubtree.begin(); it != mapClusSubtree.end(); ++it)
+    {
+        mapSTHts.insert(map<set<int>, double>::value_type(it->first, it->second.second));
+    }
+
+    //strNWHt += ":" + std::to_string( mapClusSubtree[nodesAll].second );
+    return strNWHt;
 }
 
-int DistanceTreeBuilder ::GetNumCompatCladesIn(
-    const set<int> &clus1, const set<set<int> > &setCladesTest,
-    int numTotElem) const {
-  //
-  int res = 0;
-  for (set<set<int> >::const_iterator it = setCladesTest.begin();
-       it != setCladesTest.end(); ++it) {
-    if (IsClusterIncompatible(clus1, *it, numTotElem) == true) {
-      ++res;
+int DistanceTreeBuilder ::GetNumCompatCladesIn(const set<int> &clus1, const set<set<int>> &setCladesTest, int numTotElem) const
+{
+    //
+    int res = 0;
+    for (set<set<int>>::const_iterator it = setCladesTest.begin(); it != setCladesTest.end(); ++it)
+    {
+        if (IsClusterIncompatible(clus1, *it, numTotElem) == true)
+        {
+            ++res;
+        }
     }
-  }
-  return res;
+    return res;
 }
 
 //***********************************************************************
 // tool for building UPGMA tree
 
-ConstrainedUPGMATreeBuilder ::ConstrainedUPGMATreeBuilder(
-    PhyloDistance &distPairwiseTaxaIn,
-    const set<set<int> > &setClustersMustHaveIn,
-    const set<set<int> > &setClustersForbiddenIn, int numTotElemIn)
-    : distPairwiseTaxa(distPairwiseTaxaIn),
-      setClustersMustHave(setClustersMustHaveIn),
-      setClustersForbidden(setClustersForbiddenIn), numTotElem(numTotElemIn) {
-  Init();
+ConstrainedUPGMATreeBuilder ::ConstrainedUPGMATreeBuilder(PhyloDistance &distPairwiseTaxaIn, const set<set<int>> &setClustersMustHaveIn, const set<set<int>> &setClustersForbiddenIn, int numTotElemIn) : distPairwiseTaxa(distPairwiseTaxaIn), setClustersMustHave(setClustersMustHaveIn), setClustersForbidden(setClustersForbiddenIn), numTotElem(numTotElemIn)
+{
+    Init();
 }
 
-ConstrainedUPGMATreeBuilder ::ConstrainedUPGMATreeBuilder(
-    const ConstrainedUPGMATreeBuilder &rhs)
-    : distPairwiseTaxa(rhs.distPairwiseTaxa),
-      setClustersMustHave(rhs.setClustersMustHave),
-      setClustersForbidden(rhs.setClustersForbidden),
-      numTotElem(rhs.numTotElem), distMapActivePair(rhs.distMapActivePair),
-      mapClusSubtree(rhs.mapClusSubtree), histSTMerge(rhs.histSTMerge) {
-  //
+ConstrainedUPGMATreeBuilder ::ConstrainedUPGMATreeBuilder(const ConstrainedUPGMATreeBuilder &rhs) : distPairwiseTaxa(rhs.distPairwiseTaxa), setClustersMustHave(rhs.setClustersMustHave), setClustersForbidden(rhs.setClustersForbidden), numTotElem(rhs.numTotElem), distMapActivePair(rhs.distMapActivePair), mapClusSubtree(rhs.mapClusSubtree), histSTMerge(rhs.histSTMerge)
+{
+    //
 }
 
-string ConstrainedUPGMATreeBuilder ::GetTree() const {
-  set<int> nodesAll;
-  distPairwiseTaxa.GetAllNodes(nodesAll);
-  map<set<int>, pair<string, double> >::const_iterator it =
-      mapClusSubtree.find(nodesAll);
-  YW_ASSERT_INFO(it != mapClusSubtree.end(), "Not fully constructed yet");
-  string strNWHt = it->second.first;
-  // strNWHt += ":" + std::to_string( mapClusSubtree[nodesAll].second );
-  return strNWHt;
+string ConstrainedUPGMATreeBuilder ::GetTree() const
+{
+    set<int> nodesAll;
+    distPairwiseTaxa.GetAllNodes(nodesAll);
+    map<set<int>, pair<string, double>>::const_iterator it = mapClusSubtree.find(nodesAll);
+    YW_ASSERT_INFO(it != mapClusSubtree.end(), "Not fully constructed yet");
+    string strNWHt = it->second.first;
+    //strNWHt += ":" + std::to_string( mapClusSubtree[nodesAll].second );
+    return strNWHt;
 }
 
-string ConstrainedUPGMATreeBuilder ::GetPartialConsTree() const {
-  // get partially constructed tree for now; only consider those merged; that
-  // is, if nothing occur, empty
-  map<set<int>, string> mapSTs;
-  //
-  for (int i = 0; i < (int)histSTMerge.size(); ++i) {
-    //
-    map<set<int>, string>::iterator it1 = mapSTs.find(histSTMerge[i].first);
-    map<set<int>, string>::iterator it2 = mapSTs.find(histSTMerge[i].second);
-
+string ConstrainedUPGMATreeBuilder ::GetPartialConsTree() const
+{
+    // get partially constructed tree for now; only consider those merged; that is,
+    // if nothing occur, empty
+    map<set<int>, string> mapSTs;
     //
-    string strLeft, strRight;
-    if (it1 == mapSTs.end()) {
-      YW_ASSERT_INFO(histSTMerge[i].first.size() == 1, "Singleton");
-      char buf[10000];
-      sprintf(buf, "%d", *histSTMerge[i].first.begin());
-      strLeft = buf;
-    } else {
-      strLeft = it1->second;
-    }
-    if (it2 == mapSTs.end()) {
-      YW_ASSERT_INFO(histSTMerge[i].second.size() == 1, "Singleton");
-      char buf[10000];
-      sprintf(buf, "%d", *histSTMerge[i].second.begin());
-      strRight = buf;
-    } else {
-      strRight = it2->second;
-    }
-    string strLeftUse = strLeft;
-    string strRightUse = strRight;
-    if (strRight < strLeft) {
-      strLeftUse = strRight;
-      strRightUse = strLeft;
-    }
+    for (int i = 0; i < (int)histSTMerge.size(); ++i)
+    {
+        //
+        map<set<int>, string>::iterator it1 = mapSTs.find(histSTMerge[i].first);
+        map<set<int>, string>::iterator it2 = mapSTs.find(histSTMerge[i].second);
+
+        //
+        string strLeft, strRight;
+        if (it1 == mapSTs.end())
+        {
+            YW_ASSERT_INFO(histSTMerge[i].first.size() == 1, "Singleton");
+            char buf[10000];
+            sprintf(buf, "%d", *histSTMerge[i].first.begin());
+            strLeft = buf;
+        }
+        else
+        {
+            strLeft = it1->second;
+        }
+        if (it2 == mapSTs.end())
+        {
+            YW_ASSERT_INFO(histSTMerge[i].second.size() == 1, "Singleton");
+            char buf[10000];
+            sprintf(buf, "%d", *histSTMerge[i].second.begin());
+            strRight = buf;
+        }
+        else
+        {
+            strRight = it2->second;
+        }
+        string strLeftUse = strLeft;
+        string strRightUse = strRight;
+        if (strRight < strLeft)
+        {
+            strLeftUse = strRight;
+            strRightUse = strLeft;
+        }
 
-    string strMerge = "(" + strLeftUse + "," + strRightUse + ")";
-    set<int> ss = histSTMerge[i].first;
-    UnionSets(ss, histSTMerge[i].second);
-    mapSTs[ss] = strMerge;
-    if (it1 != mapSTs.end()) {
-      mapSTs.erase(it1);
-    }
-    if (it2 != mapSTs.end()) {
-      mapSTs.erase(it2);
+        string strMerge = "(" + strLeftUse + "," + strRightUse + ")";
+        set<int> ss = histSTMerge[i].first;
+        UnionSets(ss, histSTMerge[i].second);
+        mapSTs[ss] = strMerge;
+        if (it1 != mapSTs.end())
+        {
+            mapSTs.erase(it1);
+        }
+        if (it2 != mapSTs.end())
+        {
+            mapSTs.erase(it2);
+        }
     }
-  }
-  // result is concatnation of all the remaining stuff
-  string res = "(";
-  for (map<set<int>, string>::iterator it = mapSTs.begin(); it != mapSTs.end();
-       ++it) {
-    if (it != mapSTs.begin()) {
-      res += ",";
+    // result is concatnation of all the remaining stuff
+    string res = "(";
+    for (map<set<int>, string>::iterator it = mapSTs.begin(); it != mapSTs.end(); ++it)
+    {
+        if (it != mapSTs.begin())
+        {
+            res += ",";
+        }
+        res += it->second;
     }
-    res += it->second;
-  }
-  res += ")";
+    res += ")";
 
-  return res;
+    return res;
 }
 
-double ConstrainedUPGMATreeBuilder ::GetMinCoalSubtrees(set<int> &st1,
-                                                        set<int> &st2) const {
-  // cout << "*GetMinCoalSubtrees\n";
-  map<pair<set<int>, set<int> >, double>::const_iterator itOpt =
-      distMapActivePair.end();
-  for (map<pair<set<int>, set<int> >, double>::const_iterator it =
-           distMapActivePair.begin();
-       it != distMapActivePair.end(); ++it) {
-    // cout << "Dist=" << it->second << ", subtree1: ";
-    // DumpIntSet(it->first.first);
-    // cout << "subtree2: ";
-    // DumpIntSet(it->first.second);
-    set<int> scoal = it->first.first;
-    UnionSets(scoal, it->first.second);
-
-    if (itOpt != distMapActivePair.end() && itOpt->second <= it->second) {
-      // cout << "not optimal\n";
-      continue;
-    }
+double ConstrainedUPGMATreeBuilder ::GetMinCoalSubtrees(set<int> &st1, set<int> &st2) const
+{
+    //cout << "*GetMinCoalSubtrees\n";
+    map<pair<set<int>, set<int>>, double>::const_iterator itOpt = distMapActivePair.end();
+    for (map<pair<set<int>, set<int>>, double>::const_iterator it = distMapActivePair.begin(); it != distMapActivePair.end(); ++it)
+    {
+        //cout << "Dist=" << it->second << ", subtree1: ";
+        //DumpIntSet(it->first.first);
+        //cout << "subtree2: ";
+        //DumpIntSet(it->first.second);
+        set<int> scoal = it->first.first;
+        UnionSets(scoal, it->first.second);
+
+        if (itOpt != distMapActivePair.end() && itOpt->second <= it->second)
+        {
+            //cout << "not optimal\n";
+            continue;
+        }
 
-    bool fForbid =
-        setClustersForbidden.find(scoal) != setClustersForbidden.end();
-    if (fForbid == true) {
-      // cout << "Not allowed: forbidden\n";
-      continue;
-    }
-    bool fCompat =
-        IsClusterIncompatibleWithSetofClus(scoal, setClustersMustHave);
+        bool fForbid = setClustersForbidden.find(scoal) != setClustersForbidden.end();
+        if (fForbid == true)
+        {
+            //cout << "Not allowed: forbidden\n";
+            continue;
+        }
+        bool fCompat = IsClusterIncompatibleWithSetofClus(scoal, setClustersMustHave);
+
+        if (fCompat == false)
+        {
+            //cout << "Not allowed: incomaptible\n";
+            continue;
+        }
 
-    if (fCompat == false) {
-      // cout << "Not allowed: incomaptible\n";
-      continue;
+        //
+        itOpt = it;
+    }
+    // must find something
+    if (itOpt == distMapActivePair.end())
+    {
+        YW_ASSERT_INFO(false, "Fail to construct the tree");
     }
+    //cout << "here..\n";
+    st1 = itOpt->first.first;
+    st2 = itOpt->first.second;
+    //cout << "Min dist: " << itOpt->second << ", subtrees: ";
+    //DumpIntSet(st1);
+    //cout << "  and ";
+    //DumpIntSet(st2);
+    return itOpt->second;
+}
 
+void ConstrainedUPGMATreeBuilder ::GetCoalSubtreesHtBound(double htBound, set<pair<pair<set<int>, set<int>>, double>> &setCandidates) const
+{
     //
-    itOpt = it;
-  }
-  // must find something
-  if (itOpt == distMapActivePair.end()) {
-    YW_ASSERT_INFO(false, "Fail to construct the tree");
-  }
-  // cout << "here..\n";
-  st1 = itOpt->first.first;
-  st2 = itOpt->first.second;
-  // cout << "Min dist: " << itOpt->second << ", subtrees: ";
-  // DumpIntSet(st1);
-  // cout << "  and ";
-  // DumpIntSet(st2);
-  return itOpt->second;
-}
+    for (map<pair<set<int>, set<int>>, double>::const_iterator it = distMapActivePair.begin(); it != distMapActivePair.end(); ++it)
+    {
+        //cout << "Dist=" << it->second << ", subtree1: ";
+        //DumpIntSet(it->first.first);
+        //cout << "subtree2: ";
+        //DumpIntSet(it->first.second);
+
+        if (it->second > htBound)
+        {
+            //cout << "not optimal\n";
+            continue;
+        }
 
-void ConstrainedUPGMATreeBuilder ::GetCoalSubtreesHtBound(
-    double htBound,
-    set<pair<pair<set<int>, set<int> >, double> > &setCandidates) const {
-  //
-  for (map<pair<set<int>, set<int> >, double>::const_iterator it =
-           distMapActivePair.begin();
-       it != distMapActivePair.end(); ++it) {
-    // cout << "Dist=" << it->second << ", subtree1: ";
-    // DumpIntSet(it->first.first);
-    // cout << "subtree2: ";
-    // DumpIntSet(it->first.second);
-
-    if (it->second > htBound) {
-      // cout << "not optimal\n";
-      continue;
-    }
+        set<int> scoal = it->first.first;
+        UnionSets(scoal, it->first.second);
 
-    set<int> scoal = it->first.first;
-    UnionSets(scoal, it->first.second);
+        bool fForbid = setClustersForbidden.find(scoal) != setClustersForbidden.end();
+        if (fForbid == true)
+        {
+            //cout << "Not allowed\n";
+            continue;
+        }
+        bool fCompat = IsClusterIncompatibleWithSetofClus(scoal, setClustersMustHave);
 
-    bool fForbid =
-        setClustersForbidden.find(scoal) != setClustersForbidden.end();
-    if (fForbid == true) {
-      // cout << "Not allowed\n";
-      continue;
-    }
-    bool fCompat =
-        IsClusterIncompatibleWithSetofClus(scoal, setClustersMustHave);
+        if (fCompat == false)
+        {
+            continue;
+        }
 
-    if (fCompat == false) {
-      continue;
+        //
+        pair<pair<set<int>, set<int>>, double> pp;
+        pp.first = it->first;
+        pp.second = it->second;
+        setCandidates.insert(pp);
     }
-
-    //
-    pair<pair<set<int>, set<int> >, double> pp;
-    pp.first = it->first;
-    pp.second = it->second;
-    setCandidates.insert(pp);
-  }
 }
 
-void ConstrainedUPGMATreeBuilder ::MergeSubtrees(const set<int> &st1,
-                                                 const set<int> &st2,
-                                                 double htMergedST) {
-  // now merge the two
-  set<int> ssNew = st1;
-  UnionSets(ssNew, st2);
-  YW_ASSERT_INFO(mapClusSubtree.find(st1) != mapClusSubtree.end() &&
-                     mapClusSubtree.find(st2) != mapClusSubtree.end(),
-                 "Clusters: not found");
-  double htSt1 = mapClusSubtree[st1].second;
-  double htSt2 = mapClusSubtree[st2].second;
-  double distSt1 = htMergedST / 2 - htSt1;
-  double distSt2 = htMergedST / 2 - htSt2;
-  // YW_ASSERT_INFO( distSt1 >= 0.0 && distSt2 >= 0.0, "Distance: should be
-  // positive" );
-  string strDist1 = std::to_string(distSt1);
-  string strDist2 = std::to_string(distSt2);
-  string strST = "(";
-  strST += mapClusSubtree[st1].first;
-  strST += ":";
-  strST += strDist1;
-  strST += ",";
-  strST += mapClusSubtree[st2].first;
-  strST += ":";
-  strST += strDist2;
-  strST += ")";
-  double distSet = htMergedST / 2;
-  pair<string, double> sp2(strST, distSet);
-  mapClusSubtree.insert(
-      map<set<int>, pair<string, double> >::value_type(ssNew, sp2));
-  // cout << "MergeSubtrees: subtree: " << strST << ", height: " << htMergedST
-  // << ", for subtree: "; DumpIntSet( ssNew );
-  // update the distance map
-  UpdateDistUPGMA(st1, st2);
-
-  pair<set<int>, set<int> > pp(st1, st2);
-  histSTMerge.push_back(pp);
+void ConstrainedUPGMATreeBuilder ::MergeSubtrees(const set<int> &st1, const set<int> &st2, double htMergedST)
+{
+    // now merge the two
+    set<int> ssNew = st1;
+    UnionSets(ssNew, st2);
+    YW_ASSERT_INFO(mapClusSubtree.find(st1) != mapClusSubtree.end() && mapClusSubtree.find(st2) != mapClusSubtree.end(), "Clusters: not found");
+    double htSt1 = mapClusSubtree[st1].second;
+    double htSt2 = mapClusSubtree[st2].second;
+    double distSt1 = htMergedST / 2 - htSt1;
+    double distSt2 = htMergedST / 2 - htSt2;
+    //YW_ASSERT_INFO( distSt1 >= 0.0 && distSt2 >= 0.0, "Distance: should be positive" );
+    string strDist1 = std::to_string(distSt1);
+    string strDist2 = std::to_string(distSt2);
+    string strST = "(";
+    strST += mapClusSubtree[st1].first;
+    strST += ":";
+    strST += strDist1;
+    strST += ",";
+    strST += mapClusSubtree[st2].first;
+    strST += ":";
+    strST += strDist2;
+    strST += ")";
+    double distSet = htMergedST / 2;
+    pair<string, double> sp2(strST, distSet);
+    mapClusSubtree.insert(map<set<int>, pair<string, double>>::value_type(ssNew, sp2));
+    //cout << "MergeSubtrees: subtree: " << strST << ", height: " << htMergedST << ", for subtree: ";
+    //DumpIntSet( ssNew );
+    // update the distance map
+    UpdateDistUPGMA(st1, st2);
+
+    pair<set<int>, set<int>> pp(st1, st2);
+    histSTMerge.push_back(pp);
 }
 
-void ConstrainedUPGMATreeBuilder ::GetMergeCandidates(
-    map<pair<set<int>, set<int> >, double> &setCandidates) const {
-  setCandidates.clear();
-  for (map<pair<set<int>, set<int> >, double>::const_iterator it =
-           distMapActivePair.begin();
-       it != distMapActivePair.end(); ++it) {
-    // cout << "GetMergeCandidates: candidate clades: ";
-    // DumpIntSet(it->first.first);
-    // cout << "  ";
-    // DumpIntSet(it->first.second);
-    set<int> scoal = it->first.first;
-    UnionSets(scoal, it->first.second);
-    bool fForbid =
-        setClustersForbidden.find(scoal) != setClustersForbidden.end();
-    if (fForbid == true) {
-      // cout << "Not allowed\n";
-      continue;
-    }
-    // cout << "socal: not forbidden\n";
-    // DumpIntSet(scoal);
-    bool fCompat =
-        IsClusterIncompatibleWithSetofClus(scoal, setClustersMustHave);
+void ConstrainedUPGMATreeBuilder ::GetMergeCandidates(map<pair<set<int>, set<int>>, double> &setCandidates) const
+{
+    setCandidates.clear();
+    for (map<pair<set<int>, set<int>>, double>::const_iterator it = distMapActivePair.begin(); it != distMapActivePair.end(); ++it)
+    {
+        //cout << "GetMergeCandidates: candidate clades: ";
+        //DumpIntSet(it->first.first);
+        //cout << "  ";
+        //DumpIntSet(it->first.second);
+        set<int> scoal = it->first.first;
+        UnionSets(scoal, it->first.second);
+        bool fForbid = setClustersForbidden.find(scoal) != setClustersForbidden.end();
+        if (fForbid == true)
+        {
+            //cout << "Not allowed\n";
+            continue;
+        }
+        //cout << "socal: not forbidden\n";
+        //DumpIntSet(scoal);
+        bool fCompat = IsClusterIncompatibleWithSetofClus(scoal, setClustersMustHave);
 
-    if (fCompat == false) {
-      continue;
+        if (fCompat == false)
+        {
+            continue;
+        }
+        //cout << "A good candidate: ";
+        //DumpIntSet(it->first.first);
+        //cout << "  ";
+        //DumpIntSet(it->first.second);
+
+        setCandidates.insert(map<pair<set<int>, set<int>>, double>::value_type(it->first, it->second));
     }
-    // cout << "A good candidate: ";
-    // DumpIntSet(it->first.first);
-    // cout << "  ";
-    // DumpIntSet(it->first.second);
-
-    setCandidates.insert(map<pair<set<int>, set<int> >, double>::value_type(
-        it->first, it->second));
-  }
-  // cout << "Done: GetMergeCandidates\n";
+    //cout << "Done: GetMergeCandidates\n";
 }
 
-double ConstrainedUPGMATreeBuilder ::GetCurDistForTwoClusters(
-    const set<int> &clus1, const set<int> &clus2) const {
-  //
-  pair<set<int>, set<int> > ss(clus1, clus2);
-  map<pair<set<int>, set<int> >, double>::const_iterator it =
-      distMapActivePair.find(ss);
-  YW_ASSERT_INFO(it != distMapActivePair.end(), "Fail to find");
-  return it->second;
+double ConstrainedUPGMATreeBuilder ::GetCurDistForTwoClusters(const set<int> &clus1, const set<int> &clus2) const
+{
+    //
+    pair<set<int>, set<int>> ss(clus1, clus2);
+    map<pair<set<int>, set<int>>, double>::const_iterator it = distMapActivePair.find(ss);
+    YW_ASSERT_INFO(it != distMapActivePair.end(), "Fail to find");
+    return it->second;
 }
 
-void ConstrainedUPGMATreeBuilder ::SetDistForTwoClusters(const set<int> &clus1,
-                                                         const set<int> &clus2,
-                                                         double dist) {
-  pair<set<int>, set<int> > ss(clus1, clus2);
-  map<pair<set<int>, set<int> >, double>::const_iterator it =
-      distMapActivePair.find(ss);
-  YW_ASSERT_INFO(it != distMapActivePair.end(), "Fail to find");
-  distMapActivePair[ss] = dist;
+void ConstrainedUPGMATreeBuilder ::SetDistForTwoClusters(const set<int> &clus1, const set<int> &clus2, double dist)
+{
+    pair<set<int>, set<int>> ss(clus1, clus2);
+    map<pair<set<int>, set<int>>, double>::const_iterator it = distMapActivePair.find(ss);
+    YW_ASSERT_INFO(it != distMapActivePair.end(), "Fail to find");
+    distMapActivePair[ss] = dist;
 }
 
-bool ConstrainedUPGMATreeBuilder ::IsDone() const {
-  return distMapActivePair.size() == 0;
+bool ConstrainedUPGMATreeBuilder ::IsDone() const
+{
+    return distMapActivePair.size() == 0;
 }
 
-void ConstrainedUPGMATreeBuilder ::Init() {
-  set<int> nodesAll;
-  distPairwiseTaxa.GetAllNodes(nodesAll);
-  // cout << "nodesAll: ";
-  // DumpIntSet(nodesAll);
-  for (set<int>::const_iterator it1 = nodesAll.begin(); it1 != nodesAll.end();
-       ++it1) {
-    set<int> ss1;
-    ss1.insert(*it1);
-    string strLeaf = std::to_string(*it1);
-    pair<string, double> sp(strLeaf, 0.0);
-    mapClusSubtree.insert(
-        map<set<int>, pair<string, double> >::value_type(ss1, sp));
-    // cout << "Process leaf: " << strLeaf << endl;
-
-    set<int>::const_iterator it2 = it1;
-    ++it2;
-    for (; it2 != nodesAll.end(); ++it2) {
-      set<int> ss2;
-      ss2.insert(*it2);
-
-      pair<set<int>, set<int> > ss(ss1, ss2);
-      distMapActivePair.insert(
-          map<pair<set<int>, set<int> >, double>::value_type(
-              ss, distPairwiseTaxa.GetDistance(*it1, *it2)));
-      // cout << "init pairwise distance with leaf " << *it2 << " dist=" <<
-      // distPairwiseTaxa.GetDistance(*it1, *it2) << endl;
-    }
-  }
-}
+void ConstrainedUPGMATreeBuilder ::Init()
+{
+    set<int> nodesAll;
+    distPairwiseTaxa.GetAllNodes(nodesAll);
+    //cout << "nodesAll: ";
+    //DumpIntSet(nodesAll);
+    for (set<int>::const_iterator it1 = nodesAll.begin(); it1 != nodesAll.end(); ++it1)
+    {
+        set<int> ss1;
+        ss1.insert(*it1);
+        string strLeaf = std::to_string(*it1);
+        pair<string, double> sp(strLeaf, 0.0);
+        mapClusSubtree.insert(map<set<int>, pair<string, double>>::value_type(ss1, sp));
+        //cout << "Process leaf: " << strLeaf << endl;
+
+        set<int>::const_iterator it2 = it1;
+        ++it2;
+        for (; it2 != nodesAll.end(); ++it2)
+        {
+            set<int> ss2;
+            ss2.insert(*it2);
 
-bool ConstrainedUPGMATreeBuilder ::IsClusterIncompatible(
-    const set<int> &clus1, const set<int> &clus2) const {
-  // cout << "Clus1: ";
-  // DumpIntSet(clus1);
-  // cout << "clus2: ";
-  // DumpIntSet(clus2);
-  // four gamate test
-  set<int> sint;
-  JoinSets(clus1, clus2, sint);
-  if (sint.size() == 0) {
-    return true;
-  }
-  // set<int> sdiff1 = clus1;
-  // SubtractSets(sdiff1, clus2);
-  if (sint == clus1 || sint == clus2) {
-    return true;
-  }
-  // set<int> sdiff2 = clus2;
-  // SubtractSets(sdiff2, clus1);
-  // if( sdiff2.size() == clus1.size() )
-  //{
-  //    return true;
-  //}
-  if (this->numTotElem > 0) {
-    set<int> sunion = clus1;
-    UnionSets(sunion, clus2);
-    if ((int)sunion.size() == numTotElem) {
-      return true;
+            pair<set<int>, set<int>> ss(ss1, ss2);
+            distMapActivePair.insert(map<pair<set<int>, set<int>>, double>::value_type(ss, distPairwiseTaxa.GetDistance(*it1, *it2)));
+            //cout << "init pairwise distance with leaf " << *it2 << " dist=" << distPairwiseTaxa.GetDistance(*it1, *it2) << endl;
+        }
     }
-  }
-  return false;
 }
 
-bool ConstrainedUPGMATreeBuilder ::IsClusterIncompatibleWithSetofClus(
-    const set<int> &clus1, const set<set<int> > &setClus) const {
-  //
-  for (set<set<int> >::const_iterator it = setClus.begin(); it != setClus.end();
-       ++it) {
-    if (IsClusterIncompatible(clus1, *it) == false) {
-      return false;
+bool ConstrainedUPGMATreeBuilder ::IsClusterIncompatible(const set<int> &clus1, const set<int> &clus2) const
+{
+    //cout << "Clus1: ";
+    //DumpIntSet(clus1);
+    //cout << "clus2: ";
+    //DumpIntSet(clus2);
+    // four gamate test
+    set<int> sint;
+    JoinSets(clus1, clus2, sint);
+    if (sint.size() == 0)
+    {
+        return true;
     }
-  }
-  return true;
-}
-
-void ConstrainedUPGMATreeBuilder ::UpdateDistUPGMA(const set<int> &st1,
-                                                   const set<int> &st2) {
-  // remove all entries with one components as the newly merged subtree
-  map<pair<set<int>, set<int> >, double> distMapUpdated;
-  // set<set<int> > setClusCurr;
-  for (map<pair<set<int>, set<int> >, double>::iterator it =
-           distMapActivePair.begin();
-       it != distMapActivePair.end(); ++it) {
-    if (it->first.first != st1 && it->first.first != st2 &&
-        it->first.second != st1 && it->first.second != st2) {
-      distMapUpdated.insert(*it);
-      // setClusCurr.insert( it->first.first );
-      // setClusCurr.insert( it->first.second );
+    //set<int> sdiff1 = clus1;
+    //SubtractSets(sdiff1, clus2);
+    if (sint == clus1 || sint == clus2)
+    {
+        return true;
     }
-  }
-
-  set<int> snew = st1;
-  UnionSets(snew, st2);
-  YW_ASSERT_INFO(mapClusSubtree.find(snew) != mapClusSubtree.end(),
-                 "Fail to find223");
-
-  // collect all subsets that are not done yet
-  set<set<int> > setsToProc;
-  for (map<pair<set<int>, set<int> >, double>::const_iterator it =
-           distMapActivePair.begin();
-       it != distMapActivePair.end(); ++it) {
-    setsToProc.insert(it->first.first);
-    setsToProc.insert(it->first.second);
-  }
-
-  // now update distance with the new one
-  // set< set<int> > setsDone;
-  for (set<set<int> >::const_iterator it = setsToProc.begin();
-       it != setsToProc.end(); ++it) {
-    // if( setsDone.find(*it) != setsDone.end() )
+    //set<int> sdiff2 = clus2;
+    //SubtractSets(sdiff2, clus1);
+    //if( sdiff2.size() == clus1.size() )
     //{
-    //    continue;
+    //    return true;
     //}
-    // setsDone.insert( *it );
-    // cout << "process cluster: ";
-    // DumpIntSet(*it);
-    if (*it == st1 || *it == st2 || *it == snew) {
-      // cout << "Skipped\n";
-      continue;
+    if (this->numTotElem > 0)
+    {
+        set<int> sunion = clus1;
+        UnionSets(sunion, clus2);
+        if ((int)sunion.size() == numTotElem)
+        {
+            return true;
+        }
     }
+    return false;
+}
 
-    // make sure this is allowed
-    // set<int> scoal = snew;
-    // UnionSets( scoal, *it);
-    // bool fForbid = setClustersForbidden.find(scoal) !=
-    // setClustersForbidden.end(); if( fForbid == true )
-    //{
-    //    //cout << "Not allowed\n";
-    //    continue;
-    //}
-    // bool fCompat = IsClusterIncompatibleWithSetofClus( scoal,
-    // setClustersMustHave );
+bool ConstrainedUPGMATreeBuilder ::IsClusterIncompatibleWithSetofClus(const set<int> &clus1, const set<set<int>> &setClus) const
+{
     //
-    // if( fCompat == false)
-    //{
-    //    continue;
-    //}
+    for (set<set<int>>::const_iterator it = setClus.begin(); it != setClus.end(); ++it)
+    {
+        if (IsClusterIncompatible(clus1, *it) == false)
+        {
+            return false;
+        }
+    }
+    return true;
+}
 
-    set<int> s1 = snew;
-    set<int> s2 = *it;
-    if (s2 < s1) {
-      s1 = *it;
-      s2 = snew;
+void ConstrainedUPGMATreeBuilder ::UpdateDistUPGMA(const set<int> &st1, const set<int> &st2)
+{
+    // remove all entries with one components as the newly merged subtree
+    map<pair<set<int>, set<int>>, double> distMapUpdated;
+    //set<set<int> > setClusCurr;
+    for (map<pair<set<int>, set<int>>, double>::iterator it = distMapActivePair.begin(); it != distMapActivePair.end(); ++it)
+    {
+        if (it->first.first != st1 && it->first.first != st2 && it->first.second != st1 && it->first.second != st2)
+        {
+            distMapUpdated.insert(*it);
+            //setClusCurr.insert( it->first.first );
+            //setClusCurr.insert( it->first.second );
+        }
     }
 
-    pair<set<int>, set<int> > pp1(st1, *it);
-    if (*it < st1) {
-      pp1.first = *it;
-      pp1.second = st1;
+    set<int> snew = st1;
+    UnionSets(snew, st2);
+    YW_ASSERT_INFO(mapClusSubtree.find(snew) != mapClusSubtree.end(), "Fail to find223");
+
+    // collect all subsets that are not done yet
+    set<set<int>> setsToProc;
+    for (map<pair<set<int>, set<int>>, double>::const_iterator it = distMapActivePair.begin(); it != distMapActivePair.end(); ++it)
+    {
+        setsToProc.insert(it->first.first);
+        setsToProc.insert(it->first.second);
     }
-    // cout << "pp1: ";
-    // DumpIntSet(pp1.first);
-    // DumpIntSet(pp1.second);
-    YW_ASSERT_INFO(distMapActivePair.find(pp1) != distMapActivePair.end(),
-                   "Fail to find111");
-    double htSt1 = distMapActivePair[pp1];
-    pair<set<int>, set<int> > pp2(st2, *it);
-    if (*it < st2) {
-      pp2.first = *it;
-      pp2.second = st2;
+
+    // now update distance with the new one
+    //set< set<int> > setsDone;
+    for (set<set<int>>::const_iterator it = setsToProc.begin(); it != setsToProc.end(); ++it)
+    {
+        //if( setsDone.find(*it) != setsDone.end() )
+        //{
+        //    continue;
+        //}
+        //setsDone.insert( *it );
+        //cout << "process cluster: ";
+        //DumpIntSet(*it);
+        if (*it == st1 || *it == st2 || *it == snew)
+        {
+            //cout << "Skipped\n";
+            continue;
+        }
+
+        // make sure this is allowed
+        //set<int> scoal = snew;
+        //UnionSets( scoal, *it);
+        //bool fForbid = setClustersForbidden.find(scoal) != setClustersForbidden.end();
+        //if( fForbid == true )
+        //{
+        //    //cout << "Not allowed\n";
+        //    continue;
+        //}
+        //bool fCompat = IsClusterIncompatibleWithSetofClus( scoal, setClustersMustHave );
+        //
+        //if( fCompat == false)
+        //{
+        //    continue;
+        //}
+
+        set<int> s1 = snew;
+        set<int> s2 = *it;
+        if (s2 < s1)
+        {
+            s1 = *it;
+            s2 = snew;
+        }
+
+        pair<set<int>, set<int>> pp1(st1, *it);
+        if (*it < st1)
+        {
+            pp1.first = *it;
+            pp1.second = st1;
+        }
+        //cout << "pp1: ";
+        //DumpIntSet(pp1.first);
+        //DumpIntSet(pp1.second);
+        YW_ASSERT_INFO(distMapActivePair.find(pp1) != distMapActivePair.end(), "Fail to find111");
+        double htSt1 = distMapActivePair[pp1];
+        pair<set<int>, set<int>> pp2(st2, *it);
+        if (*it < st2)
+        {
+            pp2.first = *it;
+            pp2.second = st2;
+        }
+        YW_ASSERT_INFO(distMapActivePair.find(pp2) != distMapActivePair.end(), "Fail to find112");
+        double htSt2 = distMapActivePair[pp2];
+        double distNew = (st1.size() * htSt1 + st2.size() * htSt2) / (st1.size() + st2.size());
+        //cout << "htSt1: " << htSt1 << ", htSt2: " << htSt2 << ", distNew: " << distNew << ", for clusters: " << endl;
+        //DumpIntSet(s1);
+        //DumpIntSet(s2);
+        pair<set<int>, set<int>> sp(s1, s2);
+        distMapUpdated.insert(map<pair<set<int>, set<int>>, double>::value_type(sp, distNew));
     }
-    YW_ASSERT_INFO(distMapActivePair.find(pp2) != distMapActivePair.end(),
-                   "Fail to find112");
-    double htSt2 = distMapActivePair[pp2];
-    double distNew =
-        (st1.size() * htSt1 + st2.size() * htSt2) / (st1.size() + st2.size());
-    // cout << "htSt1: " << htSt1 << ", htSt2: " << htSt2 << ", distNew: " <<
-    // distNew << ", for clusters: " << endl; DumpIntSet(s1); DumpIntSet(s2);
-    pair<set<int>, set<int> > sp(s1, s2);
-    distMapUpdated.insert(
-        map<pair<set<int>, set<int> >, double>::value_type(sp, distNew));
-  }
-
-  // update map
-  distMapActivePair = distMapUpdated;
-
-  // cout <<"After update, ";
-  // Dump();
+
+    // update map
+    distMapActivePair = distMapUpdated;
+
+    //cout <<"After update, ";
+    //Dump();
 }
 
-int ConstrainedUPGMATreeBuilder ::GetNumSubtrees() const {
-  //
-  return mapClusSubtree.size();
+int ConstrainedUPGMATreeBuilder ::GetNumSubtrees() const
+{
+    //
+    return mapClusSubtree.size();
 }
 
-void ConstrainedUPGMATreeBuilder ::GetAllSubtrees(
-    map<set<int>, string> &mapSTs) const {
-  //
-  mapSTs.clear();
-  for (map<set<int>, pair<string, double> >::const_iterator it =
-           mapClusSubtree.begin();
-       it != mapClusSubtree.end(); ++it) {
+void ConstrainedUPGMATreeBuilder ::GetAllSubtrees(map<set<int>, string> &mapSTs) const
+{
     //
-    mapSTs.insert(
-        map<set<int>, string>::value_type(it->first, it->second.first));
-  }
+    mapSTs.clear();
+    for (map<set<int>, pair<string, double>>::const_iterator it = mapClusSubtree.begin(); it != mapClusSubtree.end(); ++it)
+    {
+        //
+        mapSTs.insert(map<set<int>, string>::value_type(it->first, it->second.first));
+    }
 }
 
-void ConstrainedUPGMATreeBuilder ::GetActiveSubtrees(
-    set<set<int> > &setActiveSTs) const {
-  //
-  for (map<pair<set<int>, set<int> >, double>::const_iterator it =
-           distMapActivePair.begin();
-       it != distMapActivePair.end(); ++it) {
-    setActiveSTs.insert(it->first.first);
-    setActiveSTs.insert(it->first.second);
-  }
+void ConstrainedUPGMATreeBuilder ::GetActiveSubtrees(set<set<int>> &setActiveSTs) const
+{
+    //
+    for (map<pair<set<int>, set<int>>, double>::const_iterator it = distMapActivePair.begin(); it != distMapActivePair.end(); ++it)
+    {
+        setActiveSTs.insert(it->first.first);
+        setActiveSTs.insert(it->first.second);
+    }
 }
 
-void ConstrainedUPGMATreeBuilder::Dump() const {
-  cout << "List of coalescent pairs: \n";
-  for (map<pair<set<int>, set<int> >, double>::const_iterator it =
-           distMapActivePair.begin();
-       it != distMapActivePair.end(); ++it) {
-    cout << "[" << it->second << "] ";
-    DumpIntSet(it->first.first);
-    DumpIntSet(it->first.second);
-  }
+void ConstrainedUPGMATreeBuilder::Dump() const
+{
+    cout << "List of coalescent pairs: \n";
+    for (map<pair<set<int>, set<int>>, double>::const_iterator it = distMapActivePair.begin(); it != distMapActivePair.end(); ++it)
+    {
+        cout << "[" << it->second << "] ";
+        DumpIntSet(it->first.first);
+        DumpIntSet(it->first.second);
+    }
 }
 
 //***********************************************************************
 // tool for building near-optimal UPGMA tree
 
-ConstrainedNearUPGMATreesBuilder ::ConstrainedNearUPGMATreesBuilder(
-    PhyloDistance &distPairwiseTaxaIn,
-    const set<set<int> > &setClustersMustHaveIn,
-    const set<set<int> > &setClustersForbiddenIn, int numTotElemIn)
-    : distPairwiseTaxa(distPairwiseTaxaIn),
-      setClustersMustHave(setClustersMustHaveIn),
-      setClustersForbidden(setClustersForbiddenIn), numTotElem(numTotElemIn) {}
-
-void ConstrainedNearUPGMATreesBuilder ::Construct(int maxNumTrees,
-                                                  double thresMaxDistRatio) {
-  // thresMaxDistRatio: say 1.2, meaning consdiering 1.2*min distance to use as
-  // candidate
-  YW_ASSERT_INFO(thresMaxDistRatio >= 1.0,
-                 "Threshold: cannot be less than 1.0");
-
-  map<string, ConstrainedUPGMATreeBuilder *> listTreeBuilders;
-  // start with a single tree
-  ConstrainedUPGMATreeBuilder *pBuild0 = new ConstrainedUPGMATreeBuilder(
-      this->distPairwiseTaxa, this->setClustersMustHave,
-      this->setClustersForbidden, this->numTotElem);
-  string strDummy;
-  listTreeBuilders[strDummy] = pBuild0;
-
-  // start to build near-upgma trees
-  while (true) {
-    // process each builder
-    map<string, ConstrainedUPGMATreeBuilder *> listTreeBuildersNext;
-    bool fDone = false;
-    for (map<string, ConstrainedUPGMATreeBuilder *>::iterator it =
-             listTreeBuilders.begin();
-         it != listTreeBuilders.end(); ++it) {
-      ConstrainedUPGMATreeBuilder *pCurr = it->second;
-      // perform
-      if (pCurr->IsDone() == true) {
-        fDone = true;
-        break;
-      }
-      set<int> st1, st2;
-      double minDist = pCurr->GetMinCoalSubtrees(st1, st2);
-
-      // get near-min dist
-      double distUse = thresMaxDistRatio * minDist;
-      // set< pair<pair<set<int>, set<int> >, double> > setCandidates;
-      // listTreeBuilders[i]->GetCoalSubtreesHtBound(distUse, setCandidates );
-      map<pair<set<int>, set<int> >, double> setCandidates;
-      pCurr->GetMergeCandidates(setCandidates);
-
-      YW_ASSERT_INFO(setCandidates.size() > 0, "Fail to find candidates");
-
-      // process if room for more trees
-      for (map<pair<set<int>, set<int> >, double>::iterator it =
-               setCandidates.begin();
-           it != setCandidates.end(); ++it) {
-        if (it->second > distUse) {
-          continue;
-        }
-
-        // make sure it is not the mimimum one found before
-        if ((it->first.first != st1 || it->first.second != st2) &&
-            (it->first.first != st2 || it->first.second != st1))
+ConstrainedNearUPGMATreesBuilder ::ConstrainedNearUPGMATreesBuilder(PhyloDistance &distPairwiseTaxaIn, const set<set<int>> &setClustersMustHaveIn, const set<set<int>> &setClustersForbiddenIn, int numTotElemIn) : distPairwiseTaxa(distPairwiseTaxaIn), setClustersMustHave(setClustersMustHaveIn), setClustersForbidden(setClustersForbiddenIn), numTotElem(numTotElemIn)
+{
+}
 
+void ConstrainedNearUPGMATreesBuilder ::Construct(int maxNumTrees, double thresMaxDistRatio)
+{
+    // thresMaxDistRatio: say 1.2, meaning consdiering 1.2*min distance to use as candidate
+    YW_ASSERT_INFO(thresMaxDistRatio >= 1.0, "Threshold: cannot be less than 1.0");
+
+    map<string, ConstrainedUPGMATreeBuilder *> listTreeBuilders;
+    // start with a single tree
+    ConstrainedUPGMATreeBuilder *pBuild0 = new ConstrainedUPGMATreeBuilder(this->distPairwiseTaxa, this->setClustersMustHave, this->setClustersForbidden, this->numTotElem);
+    string strDummy;
+    listTreeBuilders[strDummy] = pBuild0;
+
+    // start to build near-upgma trees
+    while (true)
+    {
+        // process each builder
+        map<string, ConstrainedUPGMATreeBuilder *> listTreeBuildersNext;
+        bool fDone = false;
+        for (map<string, ConstrainedUPGMATreeBuilder *>::iterator it = listTreeBuilders.begin(); it != listTreeBuilders.end(); ++it)
         {
-          if ((int)listTreeBuildersNext.size() < maxNumTrees) {
-            ConstrainedUPGMATreeBuilder *pBuildCopy =
-                new ConstrainedUPGMATreeBuilder(*pCurr);
-            pBuildCopy->MergeSubtrees(it->first.first, it->first.second,
-                                      it->second);
-            string strTreeCons = pBuildCopy->GetPartialConsTree();
-            if (listTreeBuildersNext.find(strTreeCons) ==
-                listTreeBuildersNext.end()) {
-              listTreeBuildersNext[strTreeCons] = pBuildCopy;
-              // cout << "Candidate merge: ";
-              // DumpIntSet(it->first.first);
-              // cout << "  ";
-              // DumpIntSet(it->first.second);
-            } else {
-              delete pBuildCopy;
+            ConstrainedUPGMATreeBuilder *pCurr = it->second;
+            // perform
+            if (pCurr->IsDone() == true)
+            {
+                fDone = true;
+                break;
             }
-          }
+            set<int> st1, st2;
+            double minDist = pCurr->GetMinCoalSubtrees(st1, st2);
+
+            // get near-min dist
+            double distUse = thresMaxDistRatio * minDist;
+            //set< pair<pair<set<int>, set<int> >, double> > setCandidates;
+            //listTreeBuilders[i]->GetCoalSubtreesHtBound(distUse, setCandidates );
+            map<pair<set<int>, set<int>>, double> setCandidates;
+            pCurr->GetMergeCandidates(setCandidates);
+
+            YW_ASSERT_INFO(setCandidates.size() > 0, "Fail to find candidates");
+
+            // process if room for more trees
+            for (map<pair<set<int>, set<int>>, double>::iterator it = setCandidates.begin(); it != setCandidates.end(); ++it)
+            {
+                if (it->second > distUse)
+                {
+                    continue;
+                }
+
+                // make sure it is not the mimimum one found before
+                if ((it->first.first != st1 || it->first.second != st2) && (it->first.first != st2 || it->first.second != st1))
+
+                {
+                    if ((int)listTreeBuildersNext.size() < maxNumTrees)
+                    {
+                        ConstrainedUPGMATreeBuilder *pBuildCopy = new ConstrainedUPGMATreeBuilder(*pCurr);
+                        pBuildCopy->MergeSubtrees(it->first.first, it->first.second, it->second);
+                        string strTreeCons = pBuildCopy->GetPartialConsTree();
+                        if (listTreeBuildersNext.find(strTreeCons) == listTreeBuildersNext.end())
+                        {
+                            listTreeBuildersNext[strTreeCons] = pBuildCopy;
+                            //cout << "Candidate merge: ";
+                            //DumpIntSet(it->first.first);
+                            //cout << "  ";
+                            //DumpIntSet(it->first.second);
+                        }
+                        else
+                        {
+                            delete pBuildCopy;
+                        }
+                    }
+                }
+            }
+
+            // do the merge of the optimal one
+            pCurr->MergeSubtrees(st1, st2, minDist);
+            string strTreeCons2 = pCurr->GetPartialConsTree();
+            if (listTreeBuildersNext.find(strTreeCons2) == listTreeBuildersNext.end())
+            {
+                listTreeBuildersNext[strTreeCons2] = pCurr;
+                //cout << "Candidate (minimum) merge: ";
+                //DumpIntSet(st1);
+                //cout << "  ";
+                //DumpIntSet(st2);
+            }
+            else
+            {
+                delete pCurr;
+            }
+        }
+        if (fDone)
+        {
+            break;
         }
-      }
-
-      // do the merge of the optimal one
-      pCurr->MergeSubtrees(st1, st2, minDist);
-      string strTreeCons2 = pCurr->GetPartialConsTree();
-      if (listTreeBuildersNext.find(strTreeCons2) ==
-          listTreeBuildersNext.end()) {
-        listTreeBuildersNext[strTreeCons2] = pCurr;
-        // cout << "Candidate (minimum) merge: ";
-        // DumpIntSet(st1);
-        // cout << "  ";
-        // DumpIntSet(st2);
-      } else {
-        delete pCurr;
-      }
+
+        // add if there is no duplicate
+        listTreeBuilders = listTreeBuildersNext;
     }
-    if (fDone) {
-      break;
+
+    // collect trees
+    setTreeCons.clear();
+    for (map<string, ConstrainedUPGMATreeBuilder *>::iterator it = listTreeBuilders.begin(); it != listTreeBuilders.end(); ++it)
+    {
+        string treres = it->second->GetTree();
+        setTreeCons.insert(treres);
+        //cout << "Tree constructed: " << treres << endl;
     }
 
-    // add if there is no duplicate
-    listTreeBuilders = listTreeBuildersNext;
-  }
-
-  // collect trees
-  setTreeCons.clear();
-  for (map<string, ConstrainedUPGMATreeBuilder *>::iterator it =
-           listTreeBuilders.begin();
-       it != listTreeBuilders.end(); ++it) {
-    string treres = it->second->GetTree();
-    setTreeCons.insert(treres);
-    // cout << "Tree constructed: " << treres << endl;
-  }
-
-  // clean
-  for (map<string, ConstrainedUPGMATreeBuilder *>::iterator it =
-           listTreeBuilders.begin();
-       it != listTreeBuilders.end(); ++it) {
-    delete it->second;
-  }
-  listTreeBuilders.clear();
+    // clean
+    for (map<string, ConstrainedUPGMATreeBuilder *>::iterator it = listTreeBuilders.begin(); it != listTreeBuilders.end(); ++it)
+    {
+        delete it->second;
+    }
+    listTreeBuilders.clear();
 }
diff --git a/trisicell/external/scistree/TreeBuilder.h b/trisicell/external/scistree/TreeBuilder.h
index c68d95b..bc28410 100644
--- a/trisicell/external/scistree/TreeBuilder.h
+++ b/trisicell/external/scistree/TreeBuilder.h
@@ -12,8 +12,8 @@
 #include <iostream>
 #include <map>
 #include <set>
-#include <string>
 #include <vector>
+#include <string>
 using namespace std;
 
 //***********************************************************************
@@ -23,122 +23,98 @@ void TestNJ();
 // implement various methods to build a phylogenetic tree
 
 // define distances between taxa
-class PhyloDistance {
+class PhyloDistance
+{
 public:
-  void SetDistance(int node1, int node2, double dist);
-  double GetDistance(int node1, int node2) const;
-  void GetAllNodes(set<int> &nodesAll) const;
-  double GetDistanceNonNeg(int node1, int node2) const;
-  double CalcAveDistBtwClusters(const set<set<int> > &setClusters) const;
-  void Dump() const;
+    void SetDistance(int node1, int node2, double dist);
+    double GetDistance(int node1, int node2) const;
+    void GetAllNodes(set<int> &nodesAll) const;
+    double GetDistanceNonNeg(int node1, int node2) const;
+    double CalcAveDistBtwClusters(const set<set<int>> &setClusters) const;
+    void Dump() const;
 
 private:
-  map<pair<int, int>, double> mapDists;
+    map<pair<int, int>, double> mapDists;
 };
 
 // distance based tree builder
-class DistanceTreeBuilder {
+class DistanceTreeBuilder
+{
 public:
-  DistanceTreeBuilder(PhyloDistance &distPairwiseTaxaIn);
-  string NJ();
-  string ConstrainedUPGMA(const set<set<int> > &setClustersMustHave,
-                          const set<set<int> > &setClustersForbidden,
-                          map<set<int>, double> &mapSTHts, int numTotElem = -1);
-  string ConstrainedUPGMA(const set<set<int> > &setClustersMustHave,
-                          const set<set<int> > &setClustersDesired,
-                          int numTopCandidates,
-                          const set<set<int> > &setClustersForbidden,
-                          map<set<int>, double> &mapSTHts, int numTotElem);
-  void SetTaxonName(int id, const string &tname) { mapIndexToName[id] = tname; }
-  void SetOutgroup(int og) { taxonOutgroup = og; }
+    DistanceTreeBuilder(PhyloDistance &distPairwiseTaxaIn);
+    string NJ();
+    string ConstrainedUPGMA(const set<set<int>> &setClustersMustHave, const set<set<int>> &setClustersForbidden, map<set<int>, double> &mapSTHts, int numTotElem = -1);
+    string ConstrainedUPGMA(const set<set<int>> &setClustersMustHave, const set<set<int>> &setClustersDesired, int numTopCandidates, const set<set<int>> &setClustersForbidden, map<set<int>, double> &mapSTHts, int numTotElem);
+    void SetTaxonName(int id, const string &tname) { mapIndexToName[id] = tname; }
+    void SetOutgroup(int og) { taxonOutgroup = og; }
 
 private:
-  void NJFindNgbrs(int nodeIdNew, set<int> &nodesToSearch, int &ngbr1,
-                   int &ngbr2);
-  double NJCalcAveDist(int nodecur, const set<int> &nodesToSearch);
-  bool IsClusterIncompatible(const set<int> &clus1, const set<int> &clus2,
-                             int numTotElem = -1) const;
-  bool IsClusterIncompatibleWithSetofClus(const set<int> &clus1,
-                                          const set<set<int> > &setClus,
-                                          int numTotElem = -1) const;
-  void UpdateDistUPGMA(const pair<set<int>, set<int> > &pairClus,
-                       const map<set<int>, pair<string, double> > &mapSubtree,
-                       map<pair<set<int>, set<int> >, double> &distMapCur);
-  string GetTaxonNameFor(int index) const;
-  int GetNumCompatCladesIn(const set<int> &clus1,
-                           const set<set<int> > &setCladesTest,
-                           int numTotElem) const;
-
-  PhyloDistance &distPairwiseTaxa;
-  map<int, string> mapIndexToName;
-  int taxonOutgroup;
+    void NJFindNgbrs(int nodeIdNew, set<int> &nodesToSearch, int &ngbr1, int &ngbr2);
+    double NJCalcAveDist(int nodecur, const set<int> &nodesToSearch);
+    bool IsClusterIncompatible(const set<int> &clus1, const set<int> &clus2, int numTotElem = -1) const;
+    bool IsClusterIncompatibleWithSetofClus(const set<int> &clus1, const set<set<int>> &setClus, int numTotElem = -1) const;
+    void UpdateDistUPGMA(const pair<set<int>, set<int>> &pairClus, const map<set<int>, pair<string, double>> &mapSubtree, map<pair<set<int>, set<int>>, double> &distMapCur);
+    string GetTaxonNameFor(int index) const;
+    int GetNumCompatCladesIn(const set<int> &clus1, const set<set<int>> &setCladesTest, int numTotElem) const;
+
+    PhyloDistance &distPairwiseTaxa;
+    map<int, string> mapIndexToName;
+    int taxonOutgroup;
 };
 
 //***********************************************************************
 // tool for building UPGMA tree
 
-class ConstrainedUPGMATreeBuilder {
+class ConstrainedUPGMATreeBuilder
+{
 public:
-  ConstrainedUPGMATreeBuilder(PhyloDistance &distPairwiseTaxaIn,
-                              const set<set<int> > &setClustersMustHave,
-                              const set<set<int> > &setClustersForbidden,
-                              int numTotElemIn = -1);
-  ConstrainedUPGMATreeBuilder(const ConstrainedUPGMATreeBuilder &rhs);
-  string GetTree() const;
-  string GetPartialConsTree() const;
-  double GetMinCoalSubtrees(set<int> &st1, set<int> &st2) const;
-  void GetCoalSubtreesHtBound(
-      double htBound,
-      set<pair<pair<set<int>, set<int> >, double> > &setCandidates) const;
-  void MergeSubtrees(const set<int> &st1, const set<int> &st2,
-                     double htMergedST);
-  void GetMergeCandidates(
-      map<pair<set<int>, set<int> >, double> &setCandidates) const;
-  double GetCurDistForTwoClusters(const set<int> &clus1,
-                                  const set<int> &clus2) const;
-  void SetDistForTwoClusters(const set<int> &clus1, const set<int> &clus2,
-                             double dist);
-  int GetNumSubtrees() const;
-  void GetAllSubtrees(map<set<int>, string> &mapSTs) const;
-  void GetActiveSubtrees(set<set<int> > &setActiveSTs) const;
-  bool IsDone() const;
-  void Dump() const;
+    ConstrainedUPGMATreeBuilder(PhyloDistance &distPairwiseTaxaIn, const set<set<int>> &setClustersMustHave, const set<set<int>> &setClustersForbidden, int numTotElemIn = -1);
+    ConstrainedUPGMATreeBuilder(const ConstrainedUPGMATreeBuilder &rhs);
+    string GetTree() const;
+    string GetPartialConsTree() const;
+    double GetMinCoalSubtrees(set<int> &st1, set<int> &st2) const;
+    void GetCoalSubtreesHtBound(double htBound, set<pair<pair<set<int>, set<int>>, double>> &setCandidates) const;
+    void MergeSubtrees(const set<int> &st1, const set<int> &st2, double htMergedST);
+    void GetMergeCandidates(map<pair<set<int>, set<int>>, double> &setCandidates) const;
+    double GetCurDistForTwoClusters(const set<int> &clus1, const set<int> &clus2) const;
+    void SetDistForTwoClusters(const set<int> &clus1, const set<int> &clus2, double dist);
+    int GetNumSubtrees() const;
+    void GetAllSubtrees(map<set<int>, string> &mapSTs) const;
+    void GetActiveSubtrees(set<set<int>> &setActiveSTs) const;
+    bool IsDone() const;
+    void Dump() const;
 
 private:
-  void Init();
-  bool IsClusterIncompatible(const set<int> &clus1,
-                             const set<int> &clus2) const;
-  bool IsClusterIncompatibleWithSetofClus(const set<int> &clus1,
-                                          const set<set<int> > &setClus) const;
-  void UpdateDistUPGMA(const set<int> &st1, const set<int> &st2);
-
-  PhyloDistance &distPairwiseTaxa;
-  const set<set<int> > &setClustersMustHave;
-  const set<set<int> > &setClustersForbidden;
-  int numTotElem;
-  map<pair<set<int>, set<int> >, double> distMapActivePair;
-  map<set<int>, pair<string, double> > mapClusSubtree;
-  vector<pair<set<int>, set<int> > > histSTMerge;
+    void Init();
+    bool IsClusterIncompatible(const set<int> &clus1, const set<int> &clus2) const;
+    bool IsClusterIncompatibleWithSetofClus(const set<int> &clus1, const set<set<int>> &setClus) const;
+    void UpdateDistUPGMA(const set<int> &st1, const set<int> &st2);
+
+    PhyloDistance &distPairwiseTaxa;
+    const set<set<int>> &setClustersMustHave;
+    const set<set<int>> &setClustersForbidden;
+    int numTotElem;
+    map<pair<set<int>, set<int>>, double> distMapActivePair;
+    map<set<int>, pair<string, double>> mapClusSubtree;
+    vector<pair<set<int>, set<int>>> histSTMerge;
 };
 
 //***********************************************************************
 // tool for building near-optimal UPGMA tree
 
-class ConstrainedNearUPGMATreesBuilder {
+class ConstrainedNearUPGMATreesBuilder
+{
 public:
-  ConstrainedNearUPGMATreesBuilder(PhyloDistance &distPairwiseTaxaIn,
-                                   const set<set<int> > &setClustersMustHave,
-                                   const set<set<int> > &setClustersForbidden,
-                                   int numTotElem);
-  void Construct(int maxNumTrees, double thresMaxDistRatio);
-  void GetTrees(set<string> &setConsTrees) const { setConsTrees = setTreeCons; }
+    ConstrainedNearUPGMATreesBuilder(PhyloDistance &distPairwiseTaxaIn, const set<set<int>> &setClustersMustHave, const set<set<int>> &setClustersForbidden, int numTotElem);
+    void Construct(int maxNumTrees, double thresMaxDistRatio);
+    void GetTrees(set<string> &setConsTrees) const { setConsTrees = setTreeCons; }
 
 private:
-  PhyloDistance &distPairwiseTaxa;
-  const set<set<int> > &setClustersMustHave;
-  const set<set<int> > &setClustersForbidden;
-  int numTotElem;
-  set<string> setTreeCons;
+    PhyloDistance &distPairwiseTaxa;
+    const set<set<int>> &setClustersMustHave;
+    const set<set<int>> &setClustersForbidden;
+    int numTotElem;
+    set<string> setTreeCons;
 };
 
 #endif /* defined(____TreeBuilder__) */
diff --git a/trisicell/external/scistree/UnWeightedGraph.cpp b/trisicell/external/scistree/UnWeightedGraph.cpp
index 0d79eb6..f444aa2 100644
--- a/trisicell/external/scistree/UnWeightedGraph.cpp
+++ b/trisicell/external/scistree/UnWeightedGraph.cpp
@@ -7,194 +7,196 @@
 #include <stack>
 using namespace std;
 
-#if 0
-void DumpIntVec( const vector<int> &vec)
+void OutputQuotedString(ofstream &outFile, const char *buf)
 {
-	if(vec.size() == 0)
-	{
-		cout << "No items in the vector." << endl;
-		return;
-	}
-	for(int i=0; i<vec.size()-1; ++i)
-	{
-		cout << vec[i] << ", ";
-	}
-	cout << vec[ vec.size()-1] << endl;
-}
-
-void DumpIntArray(const int array[], int sz)
-{
-	for(int i=0; i<sz-1; ++i)
-	{
-		cout << array[i] << ", ";
-	}
-	cout << array[ sz-1] << endl;
-}
-#endif
-
-void OutputQuotedString(ofstream &outFile, const char *buf) {
-  outFile << '"';
-  outFile << buf;
-  outFile << '"';
+    outFile << '"';
+    outFile << buf;
+    outFile << '"';
 }
 
 // ***************************************************************************
 // Computing shortest path inside graph
 // ***************************************************************************
 
-int UnWeightedGraph ::GetAdjVert(int src, int lastAdj) {
-  // we want to find out adjacent nodes for node src
-  // so we simply check whether edge (src, i) is an edge of graph
-  // if so, return i
-  int res = NIL_VERTEX;
-  int start = lastAdj + 1;
-  if (start < 0) {
-    start = 0;
-  }
-  for (int i = start; i < (int)listVertices.size(); ++i) {
-    BGVertex *pv1 = &listVertices[src];
-    BGVertex *pv2 = &listVertices[i];
-    int j;
-    for (j = 0; j < (int)listEdges.size(); ++j) {
-      BGEdge &pe = listEdges[j];
-      if ((pe.pv1 == pv1 && pe.pv2 == pv2) ||
-          (pe.pv1 == pv2 && pe.pv2 == pv1)) {
-        res = i;
-        break;
-      }
+int UnWeightedGraph ::GetAdjVert(int src, int lastAdj)
+{
+    // we want to find out adjacent nodes for node src
+    // so we simply check whether edge (src, i) is an edge of graph
+    // if so, return i
+    int res = NIL_VERTEX;
+    int start = lastAdj + 1;
+    if (start < 0)
+    {
+        start = 0;
     }
-    if (j < (int)listEdges.size()) {
-      break;
+    for (int i = start; i < (int)listVertices.size(); ++i)
+    {
+        BGVertex *pv1 = &listVertices[src];
+        BGVertex *pv2 = &listVertices[i];
+        int j;
+        for (j = 0; j < (int)listEdges.size(); ++j)
+        {
+            BGEdge &pe = listEdges[j];
+            if ((pe.pv1 == pv1 && pe.pv2 == pv2) ||
+                (pe.pv1 == pv2 && pe.pv2 == pv1))
+            {
+                res = i;
+                break;
+            }
+        }
+        if (j < (int)listEdges.size())
+        {
+            break;
+        }
     }
-  }
 
-  return res;
+    return res;
 }
 
-bool UnWeightedGraph ::IsNeighour(int v1, int v2) {
-  BGVertex *pv1 = &listVertices[v1];
-  BGVertex *pv2 = &listVertices[v2];
-  int j;
-  for (j = 0; j < (int)listEdges.size(); ++j) {
-    BGEdge &pe = listEdges[j];
-    if ((pe.pv1 == pv1 && pe.pv2 == pv2) || (pe.pv1 == pv2 && pe.pv2 == pv1)) {
-      return true;
+bool UnWeightedGraph ::IsNeighour(int v1, int v2)
+{
+    BGVertex *pv1 = &listVertices[v1];
+    BGVertex *pv2 = &listVertices[v2];
+    int j;
+    for (j = 0; j < (int)listEdges.size(); ++j)
+    {
+        BGEdge &pe = listEdges[j];
+        if ((pe.pv1 == pv1 && pe.pv2 == pv2) ||
+            (pe.pv1 == pv2 && pe.pv2 == pv1))
+        {
+            return true;
+        }
     }
-  }
 
-  return false;
+    return false;
 }
 
-bool UnWeightedGraph ::IsConnected() {
-  // This function checks if the graph is connected or not
-  bool res = true;
-
-  // Initially, no vertex has been visited
-  int numVerts = listVertices.size();
-  bool *visited = new bool[numVerts];
-  for (int i = 0; i < numVerts; ++i) {
-    visited[i] = false;
-  }
-  visited[0] = true; // start from vertex 0, arbitarily
-  while (true) {
-    bool foundNew = false;
-    // we start from visited nodes to see if we can find new neighouring new
-    // nodes
-    for (int i = 0; i < numVerts; ++i) {
-      if (visited[i] == true) {
-        // Now get all i's neighour
-        int ngbi = -1;
-        while (true) {
-          ngbi = GetAdjVert(i, ngbi);
-          if (ngbi == NIL_VERTEX) {
-            break;
-          }
-          if (visited[ngbi] == false) {
-            visited[ngbi] = true;
-            foundNew = true;
-            break;
-          }
-        }
+bool UnWeightedGraph ::IsConnected()
+{
+    // This function checks if the graph is connected or not
+    bool res = true;
 
-        if (foundNew == true) {
-          break;
-        }
-      }
+    // Initially, no vertex has been visited
+    int numVerts = listVertices.size();
+    bool *visited = new bool[numVerts];
+    for (int i = 0; i < numVerts; ++i)
+    {
+        visited[i] = false;
     }
+    visited[0] = true; // start from vertex 0, arbitarily
+    while (true)
+    {
+        bool foundNew = false;
+        // we start from visited nodes to see if we can find new neighouring new nodes
+        for (int i = 0; i < numVerts; ++i)
+        {
+            if (visited[i] == true)
+            {
+                // Now get all i's neighour
+                int ngbi = -1;
+                while (true)
+                {
+                    ngbi = GetAdjVert(i, ngbi);
+                    if (ngbi == NIL_VERTEX)
+                    {
+                        break;
+                    }
+                    if (visited[ngbi] == false)
+                    {
+                        visited[ngbi] = true;
+                        foundNew = true;
+                        break;
+                    }
+                }
+
+                if (foundNew == true)
+                {
+                    break;
+                }
+            }
+        }
 
-    if (foundNew == false) {
-      break;
+        if (foundNew == false)
+        {
+            break;
+        }
     }
-  }
 
-  // Finally, we check if all vertices visited
-  for (int i = 0; i < numVerts; ++i) {
-    if (visited[i] == false) {
-      res = false;
-      break;
+    // Finally, we check if all vertices visited
+    for (int i = 0; i < numVerts; ++i)
+    {
+        if (visited[i] == false)
+        {
+            res = false;
+            break;
+        }
     }
-  }
 
-  return res;
+    return res;
 }
 
-void UnWeightedGraph ::OutputGML(const char *inFileName) {
-  // Now output a file in GML format
-  // First create a new name
-  string name = inFileName;
-  // cout << "num edges = " << listEdges.size() << endl;
-
-  DEBUG("FileName=");
-  DEBUG(name);
-  DEBUG("\n");
-  // Now open file to write out
-  ofstream outFile(name.c_str());
-
-  // First output some header info
-  outFile << "graph [\n";
-  outFile << "comment ";
-  OutputQuotedString(outFile, "Automatically generated by Graphing tool");
-  outFile << "\ndirected  0\n";
-  outFile << "id  1\n";
-  outFile << "label ";
-  OutputQuotedString(outFile, "To be more meaningful later....\n");
-
-  // Now output all the vertices
-  int i;
-  int numVerts = listVertices.size();
-  for (i = 0; i < numVerts; ++i) {
-    outFile << "node [\n";
-    char name[100];
-    name[0] = 'v';
-    sprintf(&name[1], "%d", i + 1);
-    outFile << "id " << i + 1 << endl;
+void UnWeightedGraph ::OutputGML(const char *inFileName)
+{
+    // Now output a file in GML format
+    // First create a new name
+    string name = inFileName;
+    //cout << "num edges = " << listEdges.size() << endl;
+
+    DEBUG("FileName=");
+    DEBUG(name);
+    DEBUG("\n");
+    // Now open file to write out
+    ofstream outFile(name.c_str());
+
+    // First output some header info
+    outFile << "graph [\n";
+    outFile << "comment ";
+    OutputQuotedString(outFile, "Automatically generated by Graphing tool");
+    outFile << "\ndirected  0\n";
+    outFile << "id  1\n";
     outFile << "label ";
-    OutputQuotedString(outFile, name);
-    outFile << endl;
-    outFile << "defaultAtrribute   1\n";
-    outFile << "]\n";
-  }
-
-  // Now output all the edges
-  for (i = 0; i < numVerts; ++i) {
-    for (int j = i + 1; j < numVerts; ++j) {
-      if (IsNeighour(i, j)) {
-
-        // cout << "Output an edge \n";
-        outFile << "edge [\n";
-        outFile << "source " << i + 1 << endl;
-        outFile << "target  " << j + 1 << endl;
+    OutputQuotedString(outFile, "To be more meaningful later....\n");
+
+    // Now output all the vertices
+    int i;
+    int numVerts = listVertices.size();
+    for (i = 0; i < numVerts; ++i)
+    {
+        outFile << "node [\n";
+        char name[100];
+        name[0] = 'v';
+        sprintf(&name[1], "%d", i + 1);
+        outFile << "id " << i + 1 << endl;
         outFile << "label ";
-        OutputQuotedString(outFile, "");
-        outFile << "\n";
+        OutputQuotedString(outFile, name);
+        outFile << endl;
+        outFile << "defaultAtrribute   1\n";
         outFile << "]\n";
-      }
     }
-  }
 
-  // Finally quite after closing file
-  outFile << "\n]\n";
-  outFile.close();
+    // Now output all the edges
+    for (i = 0; i < numVerts; ++i)
+    {
+        for (int j = i + 1; j < numVerts; ++j)
+        {
+            if (IsNeighour(i, j))
+            {
+
+                //cout << "Output an edge \n";
+                outFile << "edge [\n";
+                outFile << "source " << i + 1 << endl;
+                outFile << "target  " << j + 1 << endl;
+                outFile << "label ";
+                OutputQuotedString(outFile, "");
+                outFile << "\n";
+                outFile << "]\n";
+            }
+        }
+    }
+
+    // Finally quite after closing file
+    outFile << "\n]\n";
+    outFile.close();
 }
 
 // ***************************************************************************
@@ -205,746 +207,812 @@ void UnWeightedGraph ::OutputGML(const char *inFileName) {
 // Generic graph
 // ***************************************************************************
 
-GenericGraph::GenericGraph() {
-  nextId = 0; // id starting from 1
+GenericGraph::GenericGraph()
+{
+    nextId = 0; // id starting from 1
 }
 
-int GenericGraph ::AddVertex(int val) {
-  int res = nextId;
-  GraphVertex v(nextId, val);
+int GenericGraph ::AddVertex(int val)
+{
+    int res = nextId;
+    GraphVertex v(nextId, val);
 
-  vertices.insert(map<int, GraphVertex>::value_type(nextId, v));
+    vertices.insert(map<int, GraphVertex>::value_type(nextId, v));
 
-  nextId++; // never reuse id
+    nextId++; // never reuse id
 
-  return res;
+    return res;
 }
 
-bool GenericGraph ::RemoveVertex(int id) {
-  map<int, GraphVertex>::iterator it = vertices.find(id);
-  if (it == vertices.end()) {
-    return false;
-  }
-  if (it != vertices.end()) {
-    vertices.erase(it);
-  }
-  // now also remove its entry in the edge list
-  if (adjacencyList.find(id) != adjacencyList.end()) {
-    adjacencyList.erase(id);
-  }
-  // also should remove every adj list when this id appears
-  for (map<int, EDGE_LIST>::const_iterator it2 = adjacencyList.begin();
-       it2 != adjacencyList.end(); ++it2) {
-    vector<GraphEdge> edgeListNew;
-    for (int i = 0; i < (int)it2->second.size(); ++i) {
-      GraphEdge e = it2->second[i];
-      int v1, v2;
-      e.GetVertexIDs(v1, v2);
-      if (v2 != id) {
-        // keep it
-        edgeListNew.push_back(e);
-      }
+bool GenericGraph ::RemoveVertex(int id)
+{
+    map<int, GraphVertex>::iterator it = vertices.find(id);
+    if (it == vertices.end())
+    {
+        return false;
+    }
+    if (it != vertices.end())
+    {
+        vertices.erase(it);
+    }
+    // now also remove its entry in the edge list
+    if (adjacencyList.find(id) != adjacencyList.end())
+    {
+        adjacencyList.erase(id);
     }
-    // set the new list
-    adjacencyList[it2->first] = edgeListNew;
-  }
-  return true;
+    // also should remove every adj list when this id appears
+    for (map<int, EDGE_LIST>::const_iterator it2 = adjacencyList.begin(); it2 != adjacencyList.end(); ++it2)
+    {
+        vector<GraphEdge> edgeListNew;
+        for (int i = 0; i < (int)it2->second.size(); ++i)
+        {
+            GraphEdge e = it2->second[i];
+            int v1, v2;
+            e.GetVertexIDs(v1, v2);
+            if (v2 != id)
+            {
+                // keep it
+                edgeListNew.push_back(e);
+            }
+        }
+        // set the new list
+        adjacencyList[it2->first] = edgeListNew;
+    }
+    return true;
 }
 
-int GenericGraph ::GetNumEdges() const {
-  int numEdges = 0;
+int GenericGraph ::GetNumEdges() const
+{
+    int numEdges = 0;
 
-  for (map<int, EDGE_LIST>::const_iterator it = adjacencyList.begin();
-       it != adjacencyList.end(); ++it) {
-    numEdges += it->second.size();
-  }
+    for (map<int, EDGE_LIST>::const_iterator it = adjacencyList.begin(); it != adjacencyList.end(); ++it)
+    {
+        numEdges += it->second.size();
+    }
 
-  return numEdges;
+    return numEdges;
 }
 
-int GenericGraph ::GetEdgeNum(int vid) {
-  if (vertices.find(vid) == vertices.end() ||
-      adjacencyList.find(vid) == adjacencyList.end()) {
-    // cout << "No such vertex or not in adjacency list." << endl;
-    return 0;
-  }
-  return adjacencyList[vid].size();
-  //    curpos = 0;
-  //    if( adjacencyList[vid].size() == 0 )
-  //    {
-  // cout << "Nothing in the adjacency list.\n";
-  //        return false;
-  //    }
-  // cout << "Ok, we found one edge." << endl;
-  //    e =  adjacencyList[vid][0];
-  //    return true;
+int GenericGraph ::GetEdgeNum(int vid)
+{
+    if (vertices.find(vid) == vertices.end() || adjacencyList.find(vid) == adjacencyList.end())
+    {
+        //cout << "No such vertex or not in adjacency list." << endl;
+        return 0;
+    }
+    return adjacencyList[vid].size();
+    //    curpos = 0;
+    //    if( adjacencyList[vid].size() == 0 )
+    //    {
+    //cout << "Nothing in the adjacency list.\n";
+    //        return false;
+    //    }
+    //cout << "Ok, we found one edge." << endl;
+    //    e =  adjacencyList[vid][0];
+    //    return true;
 }
 
-GraphEdge *GenericGraph ::GetEdgeByIndex(int vid, int index) {
-  if (vertices.find(vid) == vertices.end() ||
-      adjacencyList.find(vid) == adjacencyList.end()) {
-    return NULL;
-  }
-  //    curpos ++;
-  if ((int)adjacencyList[vid].size() <= index) {
-    return NULL;
-  }
-  return &adjacencyList[vid][index];
-  //    return true;
+GraphEdge *GenericGraph ::GetEdgeByIndex(int vid, int index)
+{
+    if (vertices.find(vid) == vertices.end() || adjacencyList.find(vid) == adjacencyList.end())
+    {
+        return NULL;
+    }
+    //    curpos ++;
+    if ((int)adjacencyList[vid].size() <= index)
+    {
+        return NULL;
+    }
+    return &adjacencyList[vid][index];
+    //    return true;
 }
 
-GraphEdge *GenericGraph ::GetEdge(int vid, int uid) {
-  if (vertices.find(vid) == vertices.end() ||
-      adjacencyList.find(vid) == adjacencyList.end()) {
-    // cout << "Bad vertex: vid = " << vid << endl;
-    return NULL;
-  }
-  for (int i = 0; i < (int)adjacencyList[vid].size(); ++i) {
-    int v1, v2;
-    adjacencyList[vid][i].GetVertexIDs(v1, v2);
-    if (v2 == uid) {
-      return &adjacencyList[vid][i];
+GraphEdge *GenericGraph ::GetEdge(int vid, int uid)
+{
+    if (vertices.find(vid) == vertices.end() || adjacencyList.find(vid) == adjacencyList.end())
+    {
+        //cout << "Bad vertex: vid = " << vid << endl;
+        return NULL;
+    }
+    for (int i = 0; i < (int)adjacencyList[vid].size(); ++i)
+    {
+        int v1, v2;
+        adjacencyList[vid][i].GetVertexIDs(v1, v2);
+        if (v2 == uid)
+        {
+            return &adjacencyList[vid][i];
+        }
     }
-  }
-  return NULL;
+    return NULL;
 }
 
-bool GenericGraph ::IsEdge(int vid, int uid) {
-  // Check to see if (vid, uid) is an edge or not
-  GraphEdge *pe = GetEdge(vid, uid);
-  return (pe != NULL);
+bool GenericGraph ::IsEdge(int vid, int uid)
+{
+    // Check to see if (vid, uid) is an edge or not
+    GraphEdge *pe = GetEdge(vid, uid);
+    return (pe != NULL);
 }
 
-bool GenericGraph ::FindVertexByID(int id, GraphVertex &v) {
-  // Find by id
-  if (vertices.find(id) == vertices.end()) {
-    return false;
-  } else {
-    v = vertices[id];
-    return true;
-  }
+bool GenericGraph ::FindVertexByID(int id, GraphVertex &v)
+{
+    // Find by id
+    if (vertices.find(id) == vertices.end())
+    {
+        return false;
+    }
+    else
+    {
+        v = vertices[id];
+        return true;
+    }
 }
 
-GraphVertex *GenericGraph ::FindVertex(int id) {
-  // for(map<int,GraphVertex>::iterator it = vertices.begin(); it !=
-  // vertices.end(); ++it)
-  //{
-  //    cout << "FindVertex : Vertex id = " << it->first << endl;
-  //}
-  // cout << "qunery id = " << id << endl;
-  if (vertices.find(id) == vertices.end()) {
-    // cout << "no, can not find it.\n";
-    return NULL;
-  } else {
-    // cout << "find it: vertex = " << vertices[id].GetID() << endl;
-    return &vertices[id];
-  }
+GraphVertex *GenericGraph ::FindVertex(int id)
+{
+    //for(map<int,GraphVertex>::iterator it = vertices.begin(); it != vertices.end(); ++it)
+    //{
+    //    cout << "FindVertex : Vertex id = " << it->first << endl;
+    //}
+    //cout << "qunery id = " << id << endl;
+    if (vertices.find(id) == vertices.end())
+    {
+        //cout << "no, can not find it.\n";
+        return NULL;
+    }
+    else
+    {
+        //cout << "find it: vertex = " << vertices[id].GetID() << endl;
+        return &vertices[id];
+    }
 }
 
-void GenericGraph ::SetVertexVisited(int vid, bool flag) {
-  YW_ASSERT(vertices.find(vid) != vertices.end());
-  vertices[vid].SetVisited(flag);
+void GenericGraph ::SetVertexVisited(int vid, bool flag)
+{
+    YW_ASSERT(vertices.find(vid) != vertices.end());
+    vertices[vid].SetVisited(flag);
 }
 
-bool GenericGraph ::IsVertexVisited(int vid) {
-  GraphVertex v;
-  YW_ASSERT(FindVertexByID(vid, v) == true);
+bool GenericGraph ::IsVertexVisited(int vid)
+{
+    GraphVertex v;
+    YW_ASSERT(FindVertexByID(vid, v) == true);
 
-  return v.IsVisited();
+    return v.IsVisited();
 }
 
-void GenericGraph ::SetVertexLabel(int vid, string lbl) {
-  GraphVertex *pv = FindVertex(vid);
-  YW_ASSERT_INFO(pv != NULL, "SetVertexLabel : Bad query");
-  pv->SetLabel(lbl);
+void GenericGraph ::SetVertexLabel(int vid, string lbl)
+{
+    GraphVertex *pv = FindVertex(vid);
+    YW_ASSERT_INFO(pv != NULL, "SetVertexLabel : Bad query");
+    pv->SetLabel(lbl);
 }
 
-GraphVertex *GenericGraph ::GetVertexByLabel(string lbl) {
-  for (map<int, GraphVertex>::const_iterator it = vertices.begin();
-       it != vertices.end(); ++it) {
-    if (it->second.GetLabel() == lbl) {
-      return &vertices[it->first];
+GraphVertex *GenericGraph ::GetVertexByLabel(string lbl)
+{
+    for (map<int, GraphVertex>::const_iterator it = vertices.begin(); it != vertices.end(); ++it)
+    {
+        if (it->second.GetLabel() == lbl)
+        {
+            return &vertices[it->first];
+        }
     }
-  }
-  return NULL;
+    return NULL;
 }
 
-void GenericGraph ::SetEdgeLabel(int vid1, int vid2, string lbl) {
-  // cout << "vid1 = " << vid1 << ", vid2 = " << vid2  << "lbl = " << lbl <<
-  // endl;
-  GraphEdge *pe = GetEdge(vid1, vid2);
-  YW_ASSERT_INFO(pe != NULL, "SetEdgeLabel :: Bad query");
-  pe->SetLabel(lbl);
+void GenericGraph ::SetEdgeLabel(int vid1, int vid2, string lbl)
+{
+    //cout << "vid1 = " << vid1 << ", vid2 = " << vid2  << "lbl = " << lbl << endl;
+    GraphEdge *pe = GetEdge(vid1, vid2);
+    YW_ASSERT_INFO(pe != NULL, "SetEdgeLabel :: Bad query");
+    pe->SetLabel(lbl);
 }
 
 // ***************************************************************************
 // undirected graph
 // ***************************************************************************
 
-UndirectedGraph ::UndirectedGraph() { prevArray = NULL; }
-
-UndirectedGraph ::~UndirectedGraph() {
-  if (prevArray != NULL) {
-    delete prevArray;
+UndirectedGraph ::UndirectedGraph()
+{
     prevArray = NULL;
-  }
 }
 
-bool UndirectedGraph ::AddEdge(int vid1, int vid2, int val) {
-  // We need to add an edge to the adjacent list to both vertices
-  // first make sure the graph is valid
-  if (vertices.find(vid1) == vertices.end() ||
-      vertices.find(vid2) == vertices.end()) {
-    return false;
-  }
-
-  // Then we add it to the adjaceny list
-  GraphEdge e(vid1, vid2, val);
-  if (adjacencyList.find(vid1) == adjacencyList.end()) {
-    EDGE_LIST el;
-    adjacencyList.insert(map<int, EDGE_LIST>::value_type(vid1, el));
-  }
-  adjacencyList[vid1].push_back(e);
-  if (adjacencyList.find(vid2) == adjacencyList.end()) {
-    EDGE_LIST el;
-    adjacencyList.insert(map<int, EDGE_LIST>::value_type(vid2, el));
-  }
-  adjacencyList[vid2].push_back(e);
-  return true;
+UndirectedGraph ::~UndirectedGraph()
+{
+    if (prevArray != NULL)
+    {
+        delete prevArray;
+        prevArray = NULL;
+    }
+}
+
+bool UndirectedGraph ::AddEdge(int vid1, int vid2, int val)
+{
+    // We need to add an edge to the adjacent list to both vertices
+    // first make sure the graph is valid
+    if (vertices.find(vid1) == vertices.end() || vertices.find(vid2) == vertices.end())
+    {
+        return false;
+    }
+
+    // Then we add it to the adjaceny list
+    GraphEdge e(vid1, vid2, val);
+    if (adjacencyList.find(vid1) == adjacencyList.end())
+    {
+        EDGE_LIST el;
+        adjacencyList.insert(map<int, EDGE_LIST>::value_type(vid1, el));
+    }
+    adjacencyList[vid1].push_back(e);
+    if (adjacencyList.find(vid2) == adjacencyList.end())
+    {
+        EDGE_LIST el;
+        adjacencyList.insert(map<int, EDGE_LIST>::value_type(vid2, el));
+    }
+    adjacencyList[vid2].push_back(e);
+    return true;
 }
 
-int UndirectedGraph ::GetNumEdges() const {
-  return GenericGraph::GetNumEdges() /
-         2; // in undirected graph, we counted twice here
-  //    int numEdges = 0;
+int UndirectedGraph ::GetNumEdges() const
+{
+    return GenericGraph::GetNumEdges() / 2; // in undirected graph, we counted twice here
+    //    int numEdges = 0;
 
-  //    for( map<int, EDGE_LIST> :: const_iterator it = adjacencyList.begin();
-  //    it != adjacencyList.end(); ++it)
-  //    {
-  //        numEdges += it->second.size();
-  //    }
+    //    for( map<int, EDGE_LIST> :: const_iterator it = adjacencyList.begin(); it != adjacencyList.end(); ++it)
+    //    {
+    //        numEdges += it->second.size();
+    //    }
 
-  //    return numEdges/2;      // in undirected graph, we counted twice here
+    //    return numEdges/2;      // in undirected graph, we counted twice here
 }
 
-void UndirectedGraph ::InitTraversal() {
-  for (map<int, GraphVertex>::iterator it = vertices.begin();
-       it != vertices.end(); ++it) {
-    it->second.SetVisited(false);
-  }
+void UndirectedGraph ::InitTraversal()
+{
+    for (map<int, GraphVertex>::iterator it = vertices.begin(); it != vertices.end(); ++it)
+    {
+        it->second.SetVisited(false);
+    }
 }
 
-GraphEdge *UndirectedGraph ::GetEdge(int vid, int uid) {
-  if (vertices.find(vid) == vertices.end() ||
-      adjacencyList.find(vid) == adjacencyList.end()) {
-    // cout << "Bad vertex: vid = " << vid << endl;
-    return NULL;
-  }
-  for (int i = 0; i < (int)adjacencyList[vid].size(); ++i) {
-    int v1, v2;
-    adjacencyList[vid][i].GetVertexIDs(v1, v2);
-    if (v1 == uid || v2 == uid) {
-      return &adjacencyList[vid][i];
+GraphEdge *UndirectedGraph ::GetEdge(int vid, int uid)
+{
+    if (vertices.find(vid) == vertices.end() || adjacencyList.find(vid) == adjacencyList.end())
+    {
+        //cout << "Bad vertex: vid = " << vid << endl;
+        return NULL;
     }
-  }
-  // cout << "Edge not in adjuacency list\n";
-  return NULL;
+    for (int i = 0; i < (int)adjacencyList[vid].size(); ++i)
+    {
+        int v1, v2;
+        adjacencyList[vid][i].GetVertexIDs(v1, v2);
+        if (v1 == uid || v2 == uid)
+        {
+            return &adjacencyList[vid][i];
+        }
+    }
+    //cout << "Edge not in adjuacency list\n";
+    return NULL;
 }
 
-int UndirectedGraph ::GetFirstNode(GraphVertex &v) {
-  // return -1 when done
-  itCurrent = vertices.begin();
-  if (itCurrent == vertices.end()) {
-    // No vertices
-    return -1;
-  } else {
-    v = (*itCurrent).second;
-    return (*itCurrent).first;
-  }
+int UndirectedGraph ::GetFirstNode(GraphVertex &v)
+{
+    // return -1 when done
+    itCurrent = vertices.begin();
+    if (itCurrent == vertices.end())
+    {
+        // No vertices
+        return -1;
+    }
+    else
+    {
+        v = (*itCurrent).second;
+        return (*itCurrent).first;
+    }
 }
 
-int UndirectedGraph ::GetNextNode(GraphVertex &v) {
-  // return id of the node
-  ++itCurrent;
-  if (itCurrent == vertices.end()) {
-    // No vertices
-    return -1;
-  } else {
-    v = (*itCurrent).second;
-    return (*itCurrent).first;
-  }
+int UndirectedGraph ::GetNextNode(GraphVertex &v)
+{
+    // return id of the node
+    ++itCurrent;
+    if (itCurrent == vertices.end())
+    {
+        // No vertices
+        return -1;
+    }
+    else
+    {
+        v = (*itCurrent).second;
+        return (*itCurrent).first;
+    }
 }
 
-int UndirectedGraph ::TraversalFrom(int id, set<int> &listOfCCVertices) {
-  // Return value = number of nodes visited, and store the found cc vertices
-  // into the set First mark the curernt node as visisted
-  GraphVertex v;
-  if (FindVertexByID(id, v) == false) {
-    return 0;
-  }
-  if (v.IsVisited() == true) {
-    // No need to continue if already visited
-    return 0;
-  }
-  v.SetVisited(true);
-  listOfCCVertices.insert(id);
-
-  // Now recurse into others
-  //    GRAPH_TRAV_POSITION pos;
-
-  // This function traverse graph from node'id = id
-
-  //    if( GetFirstEdge( id, edge ) == false)
-  if (GetEdgeNum(id) == 0) {
-    return 1; // only visited this node
-  }
-
-  int nRes = 0;
-  for (int evIndex = 0; evIndex < GetEdgeNum(id); ++evIndex) {
-    GraphEdge *pedge = GetEdgeByIndex(id, evIndex);
-    YW_ASSERT(pedge != NULL);
-
-    // Now move on to that edge's other node
-    int id1, id2, idToUse;
-    pedge->GetVertexIDs(id1, id2);
-    YW_ASSERT(id1 != id2);
-    if (id1 == id) {
-      idToUse = id2;
-    } else {
-      idToUse = id1;
+int UndirectedGraph ::TraversalFrom(int id, set<int> &listOfCCVertices)
+{
+    // Return value = number of nodes visited, and store the found cc vertices into the set
+    // First mark the curernt node as visisted
+    GraphVertex v;
+    if (FindVertexByID(id, v) == false)
+    {
+        return 0;
+    }
+    if (v.IsVisited() == true)
+    {
+        // No need to continue if already visited
+        return 0;
+    }
+    v.SetVisited(true);
+    listOfCCVertices.insert(id);
+
+    // Now recurse into others
+    //    GRAPH_TRAV_POSITION pos;
+
+    // This function traverse graph from node'id = id
+
+    //    if( GetFirstEdge( id, edge ) == false)
+    if (GetEdgeNum(id) == 0)
+    {
+        return 1; // only visited this node
     }
-    nRes += TraversalFrom(idToUse, listOfCCVertices);
 
-    // Now move to the next
-    //        if( GetNextEdge( id, edge ) == false)
-    //        {
-    //           break;
-    //        }
-  }
+    int nRes = 0;
+    for (int evIndex = 0; evIndex < GetEdgeNum(id); ++evIndex)
+    {
+        GraphEdge *pedge = GetEdgeByIndex(id, evIndex);
+        YW_ASSERT(pedge != NULL);
+
+        // Now move on to that edge's other node
+        int id1, id2, idToUse;
+        pedge->GetVertexIDs(id1, id2);
+        YW_ASSERT(id1 != id2);
+        if (id1 == id)
+        {
+            idToUse = id2;
+        }
+        else
+        {
+            idToUse = id1;
+        }
+        nRes += TraversalFrom(idToUse, listOfCCVertices);
 
-  return nRes;
+        // Now move to the next
+        //        if( GetNextEdge( id, edge ) == false)
+        //        {
+        //           break;
+        //        }
+    }
+
+    return nRes;
 }
 
-int UndirectedGraph ::FindUnvisitedNode() {
-  // return the id of an unvisited node, if none return -1
-  for (map<int, GraphVertex>::iterator it = vertices.begin();
-       it != vertices.end(); ++it) {
-    if (it->second.IsVisited() == false) {
-      return it->second.GetID();
+int UndirectedGraph ::FindUnvisitedNode()
+{
+    // return the id of an unvisited node, if none return -1
+    for (map<int, GraphVertex>::iterator it = vertices.begin(); it != vertices.end(); ++it)
+    {
+        if (it->second.IsVisited() == false)
+        {
+            return it->second.GetID();
+        }
     }
-  }
-  return -1;
+    return -1;
 }
 
-void UndirectedGraph ::InitPrevConfig() {
-  // if( prevArray != NULL)
-  //{
-  //  delete [] prevArray;
-  //}
-  prevMap.clear();
-  nextMap.clear();
+void UndirectedGraph ::InitPrevConfig()
+{
+    //if( prevArray != NULL)
+    //{
+    //  delete [] prevArray;
+    //}
+    prevMap.clear();
+    nextMap.clear();
 }
 
-void UndirectedGraph ::DFSSetPrevNode(int u, int uprev) {
-  // cout << "Set u=" << u << " prev is " << uprev << endl;
-  map<int, int>::iterator it = prevMap.find(u);
-  if (it != prevMap.end()) {
-    prevMap.erase(it);
-  }
-  // We need to make sure we can remove the record first before insertion
-  prevMap.insert(map<int, int>::value_type(u, uprev));
+void UndirectedGraph ::DFSSetPrevNode(int u, int uprev)
+{
+    //cout << "Set u=" << u << " prev is " << uprev << endl;
+    map<int, int>::iterator it = prevMap.find(u);
+    if (it != prevMap.end())
+    {
+        prevMap.erase(it);
+    }
+    // We need to make sure we can remove the record first before insertion
+    prevMap.insert(map<int, int>::value_type(u, uprev));
 }
 
-int UndirectedGraph ::DFSGetPrevNode(int u) {
-  if (prevMap.find(u) == prevMap.end()) {
-    return -1; // did not find anything
-  }
+int UndirectedGraph ::DFSGetPrevNode(int u)
+{
+    if (prevMap.find(u) == prevMap.end())
+    {
+        return -1; // did not find anything
+    }
 
-  // Then we get the prev
-  return prevMap[u];
+    // Then we get the prev
+    return prevMap[u];
 }
 
-void UndirectedGraph ::DFSSetNextNode(int u, int unext) {
-  // cout << "Set u=" << u << " next to " << unext << endl;
-  map<int, int>::iterator it = nextMap.find(u);
-  if (it != nextMap.end()) {
-    nextMap.erase(it);
-  }
-  nextMap.insert(map<int, int>::value_type(u, unext));
+void UndirectedGraph ::DFSSetNextNode(int u, int unext)
+{
+    //cout << "Set u=" << u << " next to " << unext << endl;
+    map<int, int>::iterator it = nextMap.find(u);
+    if (it != nextMap.end())
+    {
+        nextMap.erase(it);
+    }
+    nextMap.insert(map<int, int>::value_type(u, unext));
 }
 
-int UndirectedGraph ::DFSGetNextNode(int u) {
-  if (nextMap.find(u) == nextMap.end()) {
-    return -1; // did not find anything
-  }
+int UndirectedGraph ::DFSGetNextNode(int u)
+{
+    if (nextMap.find(u) == nextMap.end())
+    {
+        return -1; // did not find anything
+    }
 
-  // Then we get the prev
-  return nextMap[u];
+    // Then we get the prev
+    return nextMap[u];
 }
 
-bool UndirectedGraph ::IsBipartitie() {
-  // CAUTION: only work for id = 0, 1, 2, 3, ... Do not support node deletion
-  // yet. TBD
-  const int WHITE = -1;
-  const int GRAY = 1;
-  // const int BLACK = 2;
+bool UndirectedGraph ::IsBipartitie()
+{
+    // CAUTION: only work for id = 0, 1, 2, 3, ... Do not support node deletion yet. TBD
+    const int WHITE = -1;
+    const int GRAY = 1;
+    //const int BLACK = 2;
 
-  if (GetNumVertices() == 0) {
-    // No nodes
-    return true;
-  }
-
-  // Test if this graph is bipartitie
-  int *partition = new int[GetNumVertices()];
-  int *color = new int[GetNumVertices()];
-  for (int i = 0; i < GetNumVertices(); ++i) {
-    partition[i] = 0;
-    color[i] = WHITE;
-  }
-
-  int n = GetNumVertices();
-
-  // Now start form node #1, whose id = 0. CAUTION: it is OK here since there is
-  // no deletion if later on we may delete some nodes, we can not assume it
-  // anymore
-  //    partition[0] = 1;
-  //    color[0] = GRAY;
-
-  for (int v = 0; v < n; v++) // start at first vertex
-  {
-    if (color[v] != WHITE)
-      continue;
-
-    color[v] = GRAY;
-    queue<int> toGrow; // use BFS queue search
-    toGrow.push(v);
-
-    while (!toGrow.empty()) {
-      int grow = toGrow.front();
-      toGrow.pop();
-      // cout << "pop " << grow << endl;
-      // Find neighour of this node
-      //        bool flag = GetFirstEdge(grow, ev);
-
-      for (int evIndex = 0; evIndex < GetEdgeNum(grow); ++evIndex) {
-        GraphEdge *pev = GetEdgeByIndex(grow, evIndex);
-        YW_ASSERT(pev != NULL);
-        int id1, id2;
-        pev->GetVertexIDs(id1, id2);
-        int u = id1;
-        if (u == grow) {
-          u = id2;
-        }
+    if (GetNumVertices() == 0)
+    {
+        // No nodes
+        return true;
+    }
 
-        if (color[u] == WHITE) // not colored yet
-        {
-          color[u] = 3 - color[grow]; // set to other color
-          toGrow.push(u);
-          // cout << "push " << u << endl;
-        } else // check for different color
+    // Test if this graph is bipartitie
+    int *partition = new int[GetNumVertices()];
+    int *color = new int[GetNumVertices()];
+    for (int i = 0; i < GetNumVertices(); ++i)
+    {
+        partition[i] = 0;
+        color[i] = WHITE;
+    }
+
+    int n = GetNumVertices();
+
+    // Now start form node #1, whose id = 0. CAUTION: it is OK here since there is no deletion
+    // if later on we may delete some nodes, we can not assume it anymore
+    //    partition[0] = 1;
+    //    color[0] = GRAY;
+
+    for (int v = 0; v < n; v++) // start at first vertex
+    {
+        if (color[v] != WHITE)
+            continue;
+
+        color[v] = GRAY;
+        queue<int> toGrow; // use BFS queue search
+        toGrow.push(v);
+
+        while (!toGrow.empty())
         {
-          if (color[u] == color[grow]) {
-            // cout << "u=" << u << ", grow =" << grow << " are same color\n";
-            delete[] partition;
-            delete[] color;
-            return false;
-          }
-        }
-        // Now try to move to the next
-        //          flag = GetNextEdge( grow, ev );
-      }
+            int grow = toGrow.front();
+            toGrow.pop();
+            //cout << "pop " << grow << endl;
+            // Find neighour of this node
+            //        bool flag = GetFirstEdge(grow, ev);
+
+            for (int evIndex = 0; evIndex < GetEdgeNum(grow); ++evIndex)
+            {
+                GraphEdge *pev = GetEdgeByIndex(grow, evIndex);
+                YW_ASSERT(pev != NULL);
+                int id1, id2;
+                pev->GetVertexIDs(id1, id2);
+                int u = id1;
+                if (u == grow)
+                {
+                    u = id2;
+                }
+
+                if (color[u] == WHITE) // not colored yet
+                {
+                    color[u] = 3 - color[grow]; // set to other color
+                    toGrow.push(u);
+                    //cout << "push " << u << endl;
+                }
+                else // check for different color
+                {
+                    if (color[u] == color[grow])
+                    {
+                        //cout << "u=" << u << ", grow =" << grow << " are same color\n";
+                        delete[] partition;
+                        delete[] color;
+                        return false;
+                    }
+                }
+                // Now try to move to the next
+                //          flag = GetNextEdge( grow, ev );
+            }
 
-    } // more nodes in this component
-  }   // while all components have been checked
-      // cout << "here\n";
+        } // more nodes in this component
+    }     // while all components have been checked
+          //cout << "here\n";
 
-  delete[] partition;
-  delete[] color;
-  return true;
+    delete[] partition;
+    delete[] color;
+    return true;
 }
 
-void UndirectedGraph ::FindComponents(set<set<int> > &comps) {
-  // merging list of elements
-  comps.clear();
-  // init with each node as itself
-  // cout << "vertices size = " << vertices.size() << endl;
-  for (map<int, GraphVertex>::const_iterator it = vertices.begin();
-       it != vertices.end(); ++it) {
-    // cout << "add one singleton" << it->first << endl;
-    set<int> single;
-    single.insert(it->first);
-    comps.insert(single);
-  }
-  // cout << "Comp initial size = " << comps.size() << endl;
-  // see if we can merge two sets if any edges are connected (well not very
-  // efficient...)
-  bool fCont = true;
-  while (fCont == true) {
-    // cout << "Inside while loop: Components = \n";
-    // for( set< set<int> > :: iterator it = comps.begin(); it != comps.end();
-    // ++it)
-    //{
-    // DumpIntSet( *it );
-    //}
+void UndirectedGraph ::FindComponents(set<set<int>> &comps)
+{
+    // merging list of elements
+    comps.clear();
+    // init with each node as itself
+    //cout << "vertices size = " << vertices.size() << endl;
+    for (map<int, GraphVertex>::const_iterator it = vertices.begin(); it != vertices.end(); ++it)
+    {
+        //cout << "add one singleton" << it->first << endl;
+        set<int> single;
+        single.insert(it->first);
+        comps.insert(single);
+    }
+    //cout << "Comp initial size = " << comps.size() << endl;
+    // see if we can merge two sets if any edges are connected (well not very efficient...)
+    bool fCont = true;
+    while (fCont == true)
+    {
+        //cout << "Inside while loop: Components = \n";
+        //for( set< set<int> > :: iterator it = comps.begin(); it != comps.end(); ++it)
+        //{
+        //DumpIntSet( *it );
+        //}
+
+        fCont = false;
+        for (set<set<int>>::iterator it = comps.begin(); it != comps.end(); ++it)
+        {
+            //
+            set<set<int>>::iterator it2 = it;
+            it2++;
+            for (; it2 != comps.end(); ++it2)
+            {
 
-    fCont = false;
-    for (set<set<int> >::iterator it = comps.begin(); it != comps.end(); ++it) {
-      //
-      set<set<int> >::iterator it2 = it;
-      it2++;
-      for (; it2 != comps.end(); ++it2) {
-
-        // see if these two should be merged
-        for (set<int>::iterator itt1 = (*it).begin(); itt1 != (*it).end();
-             ++itt1) {
-          for (set<int>::iterator itt2 = (*it2).begin(); itt2 != (*it2).end();
-               ++itt2) {
-            //	cout << "itt1 = " << *itt1 << ", itt2 = " << *itt2 << endl;
-            // is these two connected?
-            if (IsEdge(*itt1, *itt2) == true) {
-              //	cout << "yes, an edge\n";
-              // merge it
-              fCont = true;
-              set<int> snew = *it;
-              UnionSets(snew, *it2);
-              // UnionSets(*it, *it2);
-              comps.erase(*it);
-              comps.erase(it2);
-              comps.insert(snew);
-              break;
+                // see if these two should be merged
+                for (set<int>::iterator itt1 = (*it).begin(); itt1 != (*it).end(); ++itt1)
+                {
+                    for (set<int>::iterator itt2 = (*it2).begin(); itt2 != (*it2).end(); ++itt2)
+                    {
+                        //	cout << "itt1 = " << *itt1 << ", itt2 = " << *itt2 << endl;
+                        // is these two connected?
+                        if (IsEdge(*itt1, *itt2) == true)
+                        {
+                            //	cout << "yes, an edge\n";
+                            // merge it
+                            fCont = true;
+                            set<int> snew = *it;
+                            UnionSets(snew, *it2);
+                            //UnionSets(*it, *it2);
+                            comps.erase(*it);
+                            comps.erase(it2);
+                            comps.insert(snew);
+                            break;
+                        }
+                    }
+                    if (fCont == true)
+                    {
+                        break;
+                    }
+                }
+                if (fCont == true)
+                {
+                    break;
+                }
+            }
+            if (fCont == true)
+            {
+                break;
             }
-          }
-          if (fCont == true) {
-            break;
-          }
-        }
-        if (fCont == true) {
-          break;
         }
-      }
-      if (fCont == true) {
-        break;
-      }
     }
-  }
-  // cout << "Components = \n";
-  // for( set< set<int> > :: iterator it = comps.begin(); it != comps.end();
-  // ++it)
-  //{
-  // DumpIntSet( *it );
-  //}
+    //cout << "Components = \n";
+    //for( set< set<int> > :: iterator it = comps.begin(); it != comps.end(); ++it)
+    //{
+    //DumpIntSet( *it );
+    //}
 }
 
 // ***************************************************************************
 // directed graph
 // ***************************************************************************
 
-bool DirectedGraph ::AddEdge(int vid1, int vid2, int val) {
-  // Here vid1 is source, vid2 is dest
+bool DirectedGraph ::AddEdge(int vid1, int vid2, int val)
+{
+    // Here vid1 is source, vid2 is dest
 
-  // first make sure the graph is valid
-  if (vertices.find(vid1) == vertices.end() ||
-      vertices.find(vid2) == vertices.end()) {
-    return false;
-  }
+    // first make sure the graph is valid
+    if (vertices.find(vid1) == vertices.end() || vertices.find(vid2) == vertices.end())
+    {
+        return false;
+    }
 
-  // Then we add it to the adjaceny list
-  GraphEdge e(vid1, vid2, val);
-  if (adjacencyList.find(vid1) == adjacencyList.end()) {
-    EDGE_LIST el;
-    adjacencyList.insert(map<int, EDGE_LIST>::value_type(vid1, el));
-  }
-  adjacencyList[vid1].push_back(e);
+    // Then we add it to the adjaceny list
+    GraphEdge e(vid1, vid2, val);
+    if (adjacencyList.find(vid1) == adjacencyList.end())
+    {
+        EDGE_LIST el;
+        adjacencyList.insert(map<int, EDGE_LIST>::value_type(vid1, el));
+    }
+    adjacencyList[vid1].push_back(e);
 
-  return true;
-}
-bool DirectedGraph ::IsNodeSink(int vid) {
-  // is this node a sink (i.e. no outgoing arcs?)
-  // YW_ASSERT_INFO(  );
-  if (adjacencyList.find(vid) == adjacencyList.end()) {
-    return true;
-  }
-  // otherwise, if the list is empty, also a sink
-  if (adjacencyList[vid].size() == 0) {
     return true;
-  }
-  return false;
+}
+bool DirectedGraph ::IsNodeSink(int vid)
+{
+    // is this node a sink (i.e. no outgoing arcs?)
+    //YW_ASSERT_INFO(  );
+    if (adjacencyList.find(vid) == adjacencyList.end())
+    {
+        return true;
+    }
+    // otherwise, if the list is empty, also a sink
+    if (adjacencyList[vid].size() == 0)
+    {
+        return true;
+    }
+    return false;
 }
 
-bool DirectedGraph ::IsNodeSource(int vid) {
-  // check all adj list to see if anyone point to it
-  for (map<int, EDGE_LIST>::const_iterator it = adjacencyList.begin();
-       it != adjacencyList.end(); ++it) {
-    for (int i = 0; i < (int)it->second.size(); ++i) {
-      GraphEdge e = it->second[i];
-      int v1, v2;
-      e.GetVertexIDs(v1, v2);
-      if (v2 == vid) {
-        return false;
-      }
+bool DirectedGraph ::IsNodeSource(int vid)
+{
+    // check all adj list to see if anyone point to it
+    for (map<int, EDGE_LIST>::const_iterator it = adjacencyList.begin(); it != adjacencyList.end(); ++it)
+    {
+        for (int i = 0; i < (int)it->second.size(); ++i)
+        {
+            GraphEdge e = it->second[i];
+            int v1, v2;
+            e.GetVertexIDs(v1, v2);
+            if (v2 == vid)
+            {
+                return false;
+            }
+        }
     }
-  }
-  return true;
+    return true;
 }
 
-void DirectedGraph ::OutputGML(const char *fileName) {
-  // Now output a file in GML format
-  // First create a new name
-  string name = fileName;
-  // cout << "num edges = " << listEdges.size() << endl;
-
-  DEBUG("FileName=");
-  DEBUG(name);
-  DEBUG("\n");
-  // Now open file to write out
-  ofstream outFile(name.c_str());
-
-  // First output some header info
-  outFile << "graph [\n";
-  outFile << "comment ";
-  OutputQuotedString(outFile, "Automatically generated by Graphing tool");
-  outFile << "\ndirected  1\n";
-  outFile << "id  1\n";
-  outFile << "label ";
-  OutputQuotedString(outFile, "To be more meaningful later....\n");
-
-  // Now output all the vertices
-  // int i;
-  for (map<int, GraphVertex>::const_iterator it = vertices.begin();
-       it != vertices.end(); ++it) {
-    outFile << "node [\n";
-    const char *name = it->second.GetLabel().c_str();
-    // the name is equal to it
-    //		name[0] = 'v';
-    //		sprintf(&name[1], "%d", i+1);
-    outFile << "id " << it->first << endl;
+void DirectedGraph ::OutputGML(const char *fileName)
+{
+    // Now output a file in GML format
+    // First create a new name
+    string name = fileName;
+    //cout << "num edges = " << listEdges.size() << endl;
+
+    DEBUG("FileName=");
+    DEBUG(name);
+    DEBUG("\n");
+    // Now open file to write out
+    ofstream outFile(name.c_str());
+
+    // First output some header info
+    outFile << "graph [\n";
+    outFile << "comment ";
+    OutputQuotedString(outFile, "Automatically generated by Graphing tool");
+    outFile << "\ndirected  1\n";
+    outFile << "id  1\n";
     outFile << "label ";
-    OutputQuotedString(outFile, name);
-    outFile << endl;
-    outFile << "defaultAtrribute   1\n";
-    outFile << "]\n";
-  }
-
-  // Now output all the edges
-  for (map<int, EDGE_LIST>::const_iterator it = adjacencyList.begin();
-       it != adjacencyList.end(); ++it) {
-    // Output for each id
-    for (int i = 0; i < (int)it->second.size(); ++i) {
-      GraphEdge e = it->second[i];
-      const char *name = e.GetLabel().c_str();
-      int v1, v2;
-      e.GetVertexIDs(v1, v2);
-
-      outFile << "edge [\n";
-      outFile << "source " << v1 << endl;
-      outFile << "target  " << v2 << endl;
-      outFile << "label ";
-      // cout << "edge label = " << name << endl;
-      OutputQuotedString(outFile, name);
-      outFile << "\n";
-      outFile << "]\n";
+    OutputQuotedString(outFile, "To be more meaningful later....\n");
+
+    // Now output all the vertices
+    //int i;
+    for (map<int, GraphVertex>::const_iterator it = vertices.begin(); it != vertices.end(); ++it)
+    {
+        outFile << "node [\n";
+        //const char *name = it->second.GetLabel().c_str();
+        // the name is equal to it
+        //		name[0] = 'v';
+        //		sprintf(&name[1], "%d", i+1);
+        outFile << "id " << it->first << endl;
+        outFile << "label ";
+        OutputQuotedString(outFile, (const char *)it->second.GetLabel().c_str());
+        outFile << endl;
+        outFile << "defaultAtrribute   1\n";
+        outFile << "]\n";
     }
-  }
 
-#if 0
-	for(int i=0; i< numVerts; ++i )
-	{
-		for(int j=i+1; j<numVerts; ++j)
-		{
-			if( IsNeighour ( i, j) )
-			{
-
-//cout << "Output an edge \n";
-				outFile << "edge [\n";
-				outFile << "source " << i+1 << endl;
-				outFile << "target  " << j+1 << endl;
-				outFile << "label " ;
-				OutputQuotedString( outFile,  ""  );
-				outFile << "\n";
-				outFile << "]\n";
-			}
-		}
-	}
-#endif
+    // Now output all the edges
+    for (map<int, EDGE_LIST>::const_iterator it = adjacencyList.begin(); it != adjacencyList.end(); ++it)
+    {
+        // Output for each id
+        for (int i = 0; i < (int)it->second.size(); ++i)
+        {
+            GraphEdge e = it->second[i];
+            //const char *name = e.GetLabel().c_str();
+            int v1, v2;
+            e.GetVertexIDs(v1, v2);
+
+            outFile << "edge [\n";
+            outFile << "source " << v1 << endl;
+            outFile << "target  " << v2 << endl;
+            outFile << "label ";
+            //cout << "edge label = " << name << endl;
+            OutputQuotedString(outFile, (const char *)e.GetLabel().c_str());
+            outFile << "\n";
+            outFile << "]\n";
+        }
+    }
 
-  // Finally quite after closing file
-  outFile << "\n]\n";
-  outFile.close();
+    // Finally quite after closing file
+    outFile << "\n]\n";
+    outFile.close();
 }
 
-void DirectedGraph::DFSVisitAcyclic(int nid, int &time,
-                                    map<int, int> &nodesColor,
-                                    map<int, int> &nodesdval,
-                                    map<int, int> &nodesfval,
-                                    vector<int> *plistFinishedNodes) {
-  // visit the
-  nodesColor[nid] = 1;
-  time++;
-  nodesdval[nid] = time;
-  // cout << "nid " << nid << ", D time = " << time << endl;
-  for (int ii = 0; ii < (int)adjacencyList[nid].size(); ++ii) {
+void DirectedGraph::DFSVisitAcyclic(int nid, int &time, map<int, int> &nodesColor, map<int, int> &nodesdval, map<int, int> &nodesfval, vector<int> *plistFinishedNodes)
+{
+    // visit the
+    nodesColor[nid] = 1;
+    time++;
+    nodesdval[nid] = time;
+    //cout << "nid " << nid << ", D time = " << time << endl;
+    for (int ii = 0; ii < (int)adjacencyList[nid].size(); ++ii)
+    {
+        //
+        int v1, v2;
+        adjacencyList[nid][ii].GetVertexIDs(v1, v2);
+        YW_ASSERT_INFO(v1 == nid, "wrong here");
+        //stackVisitedDFS.push( v2 );
+        if (nodesColor[v2] == 0)
+        {
+            DFSVisitAcyclic(v2, time, nodesColor, nodesdval, nodesfval, plistFinishedNodes);
+        }
+    }
     //
-    int v1, v2;
-    adjacencyList[nid][ii].GetVertexIDs(v1, v2);
-    YW_ASSERT_INFO(v1 == nid, "wrong here");
-    // stackVisitedDFS.push( v2 );
-    if (nodesColor[v2] == 0) {
-      DFSVisitAcyclic(v2, time, nodesColor, nodesdval, nodesfval,
-                      plistFinishedNodes);
+    nodesColor[nid] = 2;
+    time++;
+    nodesfval[nid] = time;
+
+    if (plistFinishedNodes != NULL)
+    {
+        plistFinishedNodes->push_back(nid);
     }
-  }
-  //
-  nodesColor[nid] = 2;
-  time++;
-  nodesfval[nid] = time;
-
-  if (plistFinishedNodes != NULL) {
-    plistFinishedNodes->push_back(nid);
-  }
-  // cout << "nid " << nid << ", F time = " << time << endl;
+    //cout << "nid " << nid << ", F time = " << time << endl;
 }
 
-bool DirectedGraph ::IsAcyclic() {
-  // for each node with in-degree 0, do a DFS search
-  map<int, int> nodesColor;
-  map<int, int> nodesdval;
-  map<int, int> nodesfval;
-  for (map<int, GraphVertex>::const_iterator it = vertices.begin();
-       it != vertices.end(); ++it) {
-    nodesColor.insert(map<int, int>::value_type(it->first, 0)); // un-visited
-    nodesdval.insert(map<int, int>::value_type(it->first, 0));
-    nodesfval.insert(map<int, int>::value_type(it->first, 0));
-  }
-  int time = 0;
-  for (map<int, GraphVertex>::const_iterator it = vertices.begin();
-       it != vertices.end(); ++it) {
-    if (nodesColor[it->first] == 0) {
-      DFSVisitAcyclic(it->first, time, nodesColor, nodesdval, nodesfval);
+bool DirectedGraph ::IsAcyclic()
+{
+    // for each node with in-degree 0, do a DFS search
+    map<int, int> nodesColor;
+    map<int, int> nodesdval;
+    map<int, int> nodesfval;
+    for (map<int, GraphVertex>::const_iterator it = vertices.begin(); it != vertices.end(); ++it)
+    {
+        nodesColor.insert(map<int, int>::value_type(it->first, 0)); // un-visited
+        nodesdval.insert(map<int, int>::value_type(it->first, 0));
+        nodesfval.insert(map<int, int>::value_type(it->first, 0));
     }
-  }
-  // check each arc
-  // if see an arc with src's time interval is contained inside dest's interval,
-  // then cycle!
-  for (map<int, GraphVertex>::const_iterator it = vertices.begin();
-       it != vertices.end(); ++it) {
-    int nodeid = it->first;
-    for (int ii = 0; ii < (int)adjacencyList[nodeid].size(); ++ii) {
-      //
-      int v1, v2;
-      adjacencyList[nodeid][ii].GetVertexIDs(v1, v2);
-      YW_ASSERT_INFO(v1 == nodeid, "wrong here");
-      // cout << "nid = " << nodeid << ", v2 = " << v2 << ", d1 = " <<
-      // nodesdval[nodeid] << "f1 = "; cout << nodesfval[nodeid] <<", d2 = " <<
-      // nodesdval[v2]  << ", f2 = " << nodesfval[v2] << endl;
-      if (nodesdval[nodeid] > nodesdval[v2] &&
-          nodesfval[nodeid] < nodesfval[v2]) {
-        // cout << "Cycle here!\n";
-        return false;
-      }
+    int time = 0;
+    for (map<int, GraphVertex>::const_iterator it = vertices.begin(); it != vertices.end(); ++it)
+    {
+        if (nodesColor[it->first] == 0)
+        {
+            DFSVisitAcyclic(it->first, time, nodesColor, nodesdval, nodesfval);
+        }
     }
-  }
-  // test whether DFS has inconsistency
-  return true;
+    // check each arc
+    // if see an arc with src's time interval is contained inside dest's interval, then cycle!
+    for (map<int, GraphVertex>::const_iterator it = vertices.begin(); it != vertices.end(); ++it)
+    {
+        int nodeid = it->first;
+        for (int ii = 0; ii < (int)adjacencyList[nodeid].size(); ++ii)
+        {
+            //
+            int v1, v2;
+            adjacencyList[nodeid][ii].GetVertexIDs(v1, v2);
+            YW_ASSERT_INFO(v1 == nodeid, "wrong here");
+            //cout << "nid = " << nodeid << ", v2 = " << v2 << ", d1 = " << nodesdval[nodeid] << "f1 = ";
+            //cout << nodesfval[nodeid] <<", d2 = " << nodesdval[v2]  << ", f2 = " << nodesfval[v2] << endl;
+            if (nodesdval[nodeid] > nodesdval[v2] && nodesfval[nodeid] < nodesfval[v2])
+            {
+                //cout << "Cycle here!\n";
+                return false;
+            }
+        }
+    }
+    // test whether DFS has inconsistency
+    return true;
 
 #if 0
     // start from every node
@@ -1000,53 +1068,59 @@ cout << "push stack v2 = " << v2 << endl;
 #endif
 }
 
-void DirectedGraph ::TrimTreeArcs() {
-  // recursivly remove all nodes as sinks
-  // loop to find one sink and remove it and start-over
-  while (true) {
-    // stop when the number of vertices is not very large
-    if (GetNumVertices() < 2) {
-      break;
-    }
+void DirectedGraph ::TrimTreeArcs()
+{
+    // recursivly remove all nodes as sinks
+    // loop to find one sink and remove it and start-over
+    while (true)
+    {
+        // stop when the number of vertices is not very large
+        if (GetNumVertices() < 2)
+        {
+            break;
+        }
 
-    bool fFound = false;
-    for (map<int, GraphVertex>::const_iterator it = vertices.begin();
-         it != vertices.end(); ++it) {
-      if (IsNodeSink(it->first) == true || IsNodeSource(it->first) == true) {
-        RemoveVertex(it->first);
-        fFound = true;
-        break;
-      }
-      // also reduce pure source nodes (e.g. no incoming edges)
-    }
-    if (fFound == false) {
-      break;
+        bool fFound = false;
+        for (map<int, GraphVertex>::const_iterator it = vertices.begin(); it != vertices.end(); ++it)
+        {
+            if (IsNodeSink(it->first) == true || IsNodeSource(it->first) == true)
+            {
+                RemoveVertex(it->first);
+                fFound = true;
+                break;
+            }
+            // also reduce pure source nodes (e.g. no incoming edges)
+        }
+        if (fFound == false)
+        {
+            break;
+        }
     }
-  }
 }
 
-void DirectedGraph ::TopologicalSort(vector<int> &listNodesFinished) {
-  //
-  // for each node with in-degree 0, do a DFS search
-  map<int, int> nodesColor;
-  map<int, int> nodesdval;
-  map<int, int> nodesfval;
-  for (map<int, GraphVertex>::const_iterator it = vertices.begin();
-       it != vertices.end(); ++it) {
-    nodesColor.insert(map<int, int>::value_type(it->first, 0)); // un-visited
-    nodesdval.insert(map<int, int>::value_type(it->first, 0));
-    nodesfval.insert(map<int, int>::value_type(it->first, 0));
-  }
-  // vector<int> listNodesFinished;
-  int time = 0;
-  for (map<int, GraphVertex>::const_iterator it = vertices.begin();
-       it != vertices.end(); ++it) {
-    if (nodesColor[it->first] == 0) {
-      DFSVisitAcyclic(it->first, time, nodesColor, nodesdval, nodesfval,
-                      &listNodesFinished);
+void DirectedGraph ::TopologicalSort(vector<int> &listNodesFinished)
+{
+    //
+    // for each node with in-degree 0, do a DFS search
+    map<int, int> nodesColor;
+    map<int, int> nodesdval;
+    map<int, int> nodesfval;
+    for (map<int, GraphVertex>::const_iterator it = vertices.begin(); it != vertices.end(); ++it)
+    {
+        nodesColor.insert(map<int, int>::value_type(it->first, 0)); // un-visited
+        nodesdval.insert(map<int, int>::value_type(it->first, 0));
+        nodesfval.insert(map<int, int>::value_type(it->first, 0));
+    }
+    //vector<int> listNodesFinished;
+    int time = 0;
+    for (map<int, GraphVertex>::const_iterator it = vertices.begin(); it != vertices.end(); ++it)
+    {
+        if (nodesColor[it->first] == 0)
+        {
+            DFSVisitAcyclic(it->first, time, nodesColor, nodesdval, nodesfval, &listNodesFinished);
+        }
     }
-  }
 
-  //
-  ReverseIntVec(listNodesFinished);
+    //
+    ReverseIntVec(listNodesFinished);
 }
diff --git a/trisicell/external/scistree/UnWeightedGraph.h b/trisicell/external/scistree/UnWeightedGraph.h
index fb24d26..eea7410 100644
--- a/trisicell/external/scistree/UnWeightedGraph.h
+++ b/trisicell/external/scistree/UnWeightedGraph.h
@@ -1,11 +1,11 @@
 #ifndef UNWEIGHTED_GRAPH_H
 #define UNWEIGHTED_GRAPH_H
 
-#include <iostream>
 #include <list>
+#include <vector>
 #include <set>
 #include <string>
-#include <vector>
+#include <iostream>
 //#include <limits>
 using namespace std;
 
@@ -16,45 +16,47 @@ void OutputQuotedString(ofstream &outFile, const char *buf);
 // ***************************************************************************
 // Buneman graph utilities
 // ***************************************************************************
-class BGVertex {
-  friend class BunemanGraph;
-  friend class BGEdge;
-  friend class UnWeightedGraph;
+class BGVertex
+{
+    friend class BunemanGraph;
+    friend class BGEdge;
+    friend class UnWeightedGraph;
 
 public:
-  BGVertex() : id(0), speciesID("") {}
-  BGVertex(const string &nm) : name(nm), id(0), speciesID("") {}
-  void AddBlock(bool blk) { blocks.push_back(blk); }
-  void SetSpeciesID(const string &id) { speciesID = id; }
-  friend ostream &operator<<(ostream &out, const BGVertex &v);
+    BGVertex() : id(0), speciesID("") {}
+    BGVertex(const string &nm) : name(nm), id(0), speciesID("") {}
+    void AddBlock(bool blk) { blocks.push_back(blk); }
+    void SetSpeciesID(const string &id) { speciesID = id; }
+    friend ostream &operator<<(ostream &out, const BGVertex &v);
 
 private:
-  string name; // name of vertex, such as "v1"
-  int id;      // unique name for node
-  string speciesID;
-  vector<bool> blocks; // false: first block, true: second block
+    string name; // name of vertex, such as "v1"
+    int id;      // unique name for node
+    string speciesID;
+    vector<bool> blocks; // false: first block, true: second block
 };
 typedef vector<BGVertex> LIST_VERTEX;
 
-class BGEdge {
-  friend class BunemanGraph;
-  friend class UnWeightedGraph;
+class BGEdge
+{
+    friend class BunemanGraph;
+    friend class UnWeightedGraph;
 
 public:
-  BGEdge() : v1Pos(-1), v2Pos(-1) { pv1 = pv2 = NULL; }
-  BGEdge(string nm) : name(nm), v1Pos(-1), v2Pos(-1) { pv1 = pv2 = NULL; }
-  BGEdge(string nm, int v1p, int v2p, LIST_VERTEX &listVerts)
-      : name(nm), v1Pos(v1p), v2Pos(v2p) {
-    pv1 = &listVerts[v1p];
-    pv2 = &listVerts[v2p];
-  }
+    BGEdge() : v1Pos(-1), v2Pos(-1) { pv1 = pv2 = NULL; }
+    BGEdge(string nm) : name(nm), v1Pos(-1), v2Pos(-1) { pv1 = pv2 = NULL; }
+    BGEdge(string nm, int v1p, int v2p, LIST_VERTEX &listVerts) : name(nm), v1Pos(v1p), v2Pos(v2p)
+    {
+        pv1 = &listVerts[v1p];
+        pv2 = &listVerts[v2p];
+    }
 
 private:
-  string name; // name of edge, such as "s1"
-  int v1Pos;
-  int v2Pos;
-  BGVertex *pv1; // end vertex1 of edge
-  BGVertex *pv2; // end vertex2 of edge
+    string name; // name of edge, such as "s1"
+    int v1Pos;
+    int v2Pos;
+    BGVertex *pv1; // end vertex1 of edge
+    BGVertex *pv2; // end vertex2 of edge
 };
 
 // ***************************************************************************
@@ -63,108 +65,117 @@ typedef set<int> INCOMPATIBLE_SET; // each integer is for indexing
                                    // into matrix columns
 typedef set<int> SPLIT_BLOCK_SET;
 typedef list<INCOMPATIBLE_SET> INC_CLIQUE_LIST;
-// typedef vector<int> SEQUENCE;			// represent sequence of
-// bits
+//typedef vector<int> SEQUENCE;			// represent sequence of bits
 
 #define NIL_VERTEX 0x7FFFFFFF
 
 // ***************************************************************************
 // UnWightedGraph class
 // ***************************************************************************
-class UnWeightedGraph {
+class UnWeightedGraph
+{
 public:
-  UnWeightedGraph() {}
-  UnWeightedGraph(LIST_VERTEX &listVerts, LIST_EDGE &listEs)
-      : listVertices(listVerts), listEdges(listEs) {}
-  ~UnWeightedGraph() {}
-  void SetVertices(LIST_VERTEX &verts) { listVertices = verts; }
-  void SetEdges(LIST_EDGE &edges) { listEdges = edges; }
-  int GetNumVertices() const { return listVertices.size(); }
-  int GetAdjVert(int src, int lastAdj);
-  bool IsConnected();
-  void OutputGML(const char *fileName);
-  bool IsNeighour(int i, int j);
-  LIST_VERTEX &GetListVerts() { return listVertices; }
+    UnWeightedGraph() {}
+    UnWeightedGraph(LIST_VERTEX &listVerts, LIST_EDGE &listEs) : listVertices(listVerts),
+                                                                 listEdges(listEs) {}
+    ~UnWeightedGraph() {}
+    void SetVertices(LIST_VERTEX &verts) { listVertices = verts; }
+    void SetEdges(LIST_EDGE &edges) { listEdges = edges; }
+    int GetNumVertices() const { return listVertices.size(); }
+    int GetAdjVert(int src, int lastAdj);
+    bool IsConnected();
+    void OutputGML(const char *fileName);
+    bool IsNeighour(int i, int j);
+    LIST_VERTEX &GetListVerts() { return listVertices; }
 
 private:
-  LIST_VERTEX listVertices;
-  LIST_EDGE listEdges;
+    LIST_VERTEX listVertices;
+    LIST_EDGE listEdges;
 };
 
 // ***************************************************************************
 // UndirectedGraph class
 // ***************************************************************************
-class GraphVertex {
+class GraphVertex
+{
 public:
-  GraphVertex() {
-    value = id = 0;
-    visited = false;
-  }
-  GraphVertex(int id1) : visited(false) { id = id1; }
-  GraphVertex(int id1, int val) : visited(false) {
-    id = id1;
-    value = val;
-  }
-  GraphVertex(const GraphVertex &rhs) {
-    value = rhs.value;
-    id = rhs.id;
-    visited = rhs.visited;
-  }
-  //  GraphVertex&  operator=(const GraphVertex &rhs) {value = rhs.value; id =
-  //  rhs.id; visited = rhs.visited; return this;}
-
-  void SetVisited(bool f) { visited = f; }
-  bool IsVisited() { return visited; }
-  int GetID() { return id; }
-  void SetValue(int v) { value = v; }
-  int GetValue() { return value; }
-  string GetLabel() const { return label; }
-  void SetLabel(string lbl) { label = lbl; }
+    GraphVertex()
+    {
+        value = id = 0;
+        visited = false;
+    }
+    GraphVertex(int id1) : visited(false) { id = id1; }
+    GraphVertex(int id1, int val) : visited(false)
+    {
+        id = id1;
+        value = val;
+    }
+    GraphVertex(const GraphVertex &rhs)
+    {
+        value = rhs.value;
+        id = rhs.id;
+        visited = rhs.visited;
+    }
+    //  GraphVertex&  operator=(const GraphVertex &rhs) {value = rhs.value; id = rhs.id; visited = rhs.visited; return this;}
+
+    void SetVisited(bool f) { visited = f; }
+    bool IsVisited() { return visited; }
+    int GetID() { return id; }
+    void SetValue(int v) { value = v; }
+    int GetValue() { return value; }
+    string GetLabel() const { return label; }
+    void SetLabel(string lbl) { label = lbl; }
 
 private:
-  int value; // a vertex can have a value
-  int id;    // id is unique, when removal no reuse
-  bool visited;
-  string label;
+    int value; // a vertex can have a value
+    int id;    // id is unique, when removal no reuse
+    bool visited;
+    string label;
 };
 
-class GraphEdge {
+class GraphEdge
+{
 public:
-  GraphEdge() {
-    vid1 = -1;
-    vid2 = -1;
-    value = -1;
-  }
-  GraphEdge(int id1, int id2) {
-    vid1 = id1;
-    vid2 = id2;
-  }
-  GraphEdge(int id1, int id2, int v) {
-    vid1 = id1;
-    vid2 = id2;
-    value = v;
-  }
-  GraphEdge(const GraphEdge &rhs) {
-    vid1 = rhs.vid1;
-    vid2 = rhs.vid2;
-    value = rhs.value;
-    label = rhs.label;
-  }
-
-  void GetVertexIDs(int &v1, int &v2) {
-    v1 = vid1;
-    v2 = vid2;
-  }
-  int GetValue() { return value; }
-  void SetValue(int v) { value = v; }
-  string GetLabel() const { return label; }
-  void SetLabel(string lbl) { label = lbl; }
+    GraphEdge()
+    {
+        vid1 = -1;
+        vid2 = -1;
+        value = -1;
+    }
+    GraphEdge(int id1, int id2)
+    {
+        vid1 = id1;
+        vid2 = id2;
+    }
+    GraphEdge(int id1, int id2, int v)
+    {
+        vid1 = id1;
+        vid2 = id2;
+        value = v;
+    }
+    GraphEdge(const GraphEdge &rhs)
+    {
+        vid1 = rhs.vid1;
+        vid2 = rhs.vid2;
+        value = rhs.value;
+        label = rhs.label;
+    }
+
+    void GetVertexIDs(int &v1, int &v2)
+    {
+        v1 = vid1;
+        v2 = vid2;
+    }
+    int GetValue() { return value; }
+    void SetValue(int v) { value = v; }
+    string GetLabel() const { return label; }
+    void SetLabel(string lbl) { label = lbl; }
 
 private:
-  int vid1;     // id of the vertex #1
-  int vid2;     // id of vertex #2
-  int value;    // an edge can have a value
-  string label; // label of the edge
+    int vid1;     // id of the vertex #1
+    int vid2;     // id of vertex #2
+    int value;    // an edge can have a value
+    string label; // label of the edge
 };
 
 #if 0
@@ -185,107 +196,106 @@ typedef int GRAPH_TRAV_POSITION;
 // ***************************************************************************
 // This is a hopefully generic class for directed graph
 // we store, at each vertex, the list of
-class GenericGraph {
+class GenericGraph
+{
 
 public:
-  typedef vector<GraphEdge> EDGE_LIST;
-
-  GenericGraph();
-  virtual ~GenericGraph() {}
-
-  // Basic graph functions
-  virtual int AddVertex(int val);
-  virtual bool RemoveVertex(int id);
-  virtual bool AddEdge(int vid1, int vid2, int val) = 0;
-  int GetNumVertices() const { return vertices.size(); }
-  virtual int GetNumEdges() const;
-  virtual int GetEdgeNum(int vid);
-  virtual GraphEdge *GetEdgeByIndex(int vid, int index);
-  virtual GraphEdge *GetEdge(int vid, int uid);
-  virtual bool IsEdge(int vid, int uid);
-  virtual bool FindVertexByID(int id, GraphVertex &v);
-  virtual GraphVertex *FindVertex(int id);
-  virtual void SetVertexVisited(int vid, bool flag);
-  virtual bool IsVertexVisited(int vid);
-  virtual void SetVertexLabel(int vid, string lbl);
-  virtual GraphVertex *GetVertexByLabel(string lbl);
-  virtual void SetEdgeLabel(int vid1, int vid2, string lbl);
+    typedef vector<GraphEdge> EDGE_LIST;
+
+    GenericGraph();
+    virtual ~GenericGraph() {}
+
+    // Basic graph functions
+    virtual int AddVertex(int val);
+    virtual bool RemoveVertex(int id);
+    virtual bool AddEdge(int vid1, int vid2, int val) = 0;
+    int GetNumVertices() const { return vertices.size(); }
+    virtual int GetNumEdges() const;
+    virtual int GetEdgeNum(int vid);
+    virtual GraphEdge *GetEdgeByIndex(int vid, int index);
+    virtual GraphEdge *GetEdge(int vid, int uid);
+    virtual bool IsEdge(int vid, int uid);
+    virtual bool FindVertexByID(int id, GraphVertex &v);
+    virtual GraphVertex *FindVertex(int id);
+    virtual void SetVertexVisited(int vid, bool flag);
+    virtual bool IsVertexVisited(int vid);
+    virtual void SetVertexLabel(int vid, string lbl);
+    virtual GraphVertex *GetVertexByLabel(string lbl);
+    virtual void SetEdgeLabel(int vid1, int vid2, string lbl);
 
 protected:
-  map<int, GraphVertex> vertices; // for faster access, use a map, indexed by id
-  map<int, EDGE_LIST> adjacencyList; // Indexed by id of the vertex
-  int nextId;
+    map<int, GraphVertex> vertices;    // for faster access, use a map, indexed by id
+    map<int, EDGE_LIST> adjacencyList; // Indexed by id of the vertex
+    int nextId;
 };
 
-class UndirectedGraph : public GenericGraph {
-  //    typedef vector<GraphEdge> EDGE_LIST;
+class UndirectedGraph : public GenericGraph
+{
+    //    typedef vector<GraphEdge> EDGE_LIST;
 public:
-  UndirectedGraph();
-  virtual ~UndirectedGraph();
-
-  // Basic graph functions
-  bool AddEdge(int vid1, int vid2, int val);
-  int GetNumEdges() const;
-
-  // bool GetFirstEdge (int vid, GraphEdge &e);
-  // bool GetNextEdge( int vid, GraphEdge &e );
-
-  GraphEdge *GetEdge(int vid, int uid);
-  int GetFirstNode(GraphVertex &v); // return -1 when done
-  int GetNextNode(GraphVertex &v);  // return id of the node
-
-  // Some traversal utility here, need improvements later
-  void InitTraversal(); // Set visited flag to false
-  int TraversalFrom(int id, set<int> &listOfCCVertices); // Start traversal from
-                                                         // node's id = id
-  int FindUnvisitedNode(); // return the id of an unvisited node, if none return
-                           // -1
-
-  // Some basic functions here
-  bool IsBipartitie();
-  void FindComponents(set<set<int> > &comps);
-
-  // DFS functions
-  void InitPrevConfig();
-  void DFSSetPrevNode(int u, int uprev);
-  int DFSGetPrevNode(int u);
-  void DFSSetNextNode(int u, int unext);
-  int DFSGetNextNode(int u);
+    UndirectedGraph();
+    virtual ~UndirectedGraph();
+
+    // Basic graph functions
+    bool AddEdge(int vid1, int vid2, int val);
+    int GetNumEdges() const;
+
+    //bool GetFirstEdge (int vid, GraphEdge &e);
+    //bool GetNextEdge( int vid, GraphEdge &e );
+
+    GraphEdge *GetEdge(int vid, int uid);
+    int GetFirstNode(GraphVertex &v); // return -1 when done
+    int GetNextNode(GraphVertex &v);  // return id of the node
+
+    // Some traversal utility here, need improvements later
+    void InitTraversal();                                  // Set visited flag to false
+    int TraversalFrom(int id, set<int> &listOfCCVertices); // Start traversal from node's id = id
+    int FindUnvisitedNode();                               // return the id of an unvisited node, if none return -1
+
+    // Some basic functions here
+    bool IsBipartitie();
+    void FindComponents(set<set<int>> &comps);
+
+    // DFS functions
+    void InitPrevConfig();
+    void DFSSetPrevNode(int u, int uprev);
+    int DFSGetPrevNode(int u);
+    void DFSSetNextNode(int u, int unext);
+    int DFSGetNextNode(int u);
 
 protected:
-  // map<int, GraphVertex> vertices;      // for faster access, use a map,
-  // indexed by id map<int, EDGE_LIST> adjacencyList;  // Indexed by id of the
-  // vertex int nextId;
+    //map<int, GraphVertex> vertices;      // for faster access, use a map, indexed by id
+    //map<int, EDGE_LIST> adjacencyList;  // Indexed by id of the vertex
+    //int nextId;
 
-  // private member here
+    // private member here
 private:
-  // Mainly used temporiliy for traversal
-  map<int, int> prevMap;
-  map<int, int> nextMap; // not really neccessary, but to make it simple....
-  map<int, GraphVertex>::iterator itCurrent;
-  //    GRAPH_TRAV_POSITION                     curpos;
-  int *prevArray;
+    // Mainly used temporiliy for traversal
+    map<int, int> prevMap;
+    map<int, int> nextMap; // not really neccessary, but to make it simple....
+    map<int, GraphVertex>::iterator itCurrent;
+    //    GRAPH_TRAV_POSITION                     curpos;
+    int *prevArray;
 };
 
 // This is a hopefully generic class for directed graph
 // we store, at each vertex, the list of
-class DirectedGraph : public GenericGraph {
+class DirectedGraph : public GenericGraph
+{
 public:
-  // Basic graph functions
-  bool AddEdge(int vid1, int vid2, int val);
-  bool IsNodeSink(int vid);
-  bool IsNodeSource(int vid);
-  bool IsAcyclic();
-  void TopologicalSort(vector<int> &listNodesIds);
+    // Basic graph functions
+    bool AddEdge(int vid1, int vid2, int val);
+    bool IsNodeSink(int vid);
+    bool IsNodeSource(int vid);
+    bool IsAcyclic();
+    void TopologicalSort(vector<int> &listNodesIds);
 
-  // Output
-  void OutputGML(const char *fileName);
-  void TrimTreeArcs(); // recursivly remove all nodes as sinks
+    // Output
+    void OutputGML(const char *fileName);
+    void TrimTreeArcs(); // recursivly remove all nodes as sinks
 
 private:
-  void DFSVisitAcyclic(int nid, int &time, map<int, int> &nodesColor,
-                       map<int, int> &nodesdval, map<int, int> &nodesfval,
-                       vector<int> *plistFinishedNodes = NULL);
+    void DFSVisitAcyclic(int nid, int &time, map<int, int> &nodesColor, map<int, int> &nodesdval, map<int, int> &nodesfval, vector<int> *plistFinishedNodes = NULL);
 };
 
-#endif // UNWEIGHTED_GRAPH_H
+#endif //UNWEIGHTED_GRAPH_H
diff --git a/trisicell/external/scistree/Utils.cpp b/trisicell/external/scistree/Utils.cpp
index 8029a68..b86de1a 100644
--- a/trisicell/external/scistree/Utils.cpp
+++ b/trisicell/external/scistree/Utils.cpp
@@ -1,19 +1,22 @@
 #include "Utils.h"
-#include "cstdio"
-#include "cstdlib"
 #include "ctime"
+#include "cstdlib"
+#include "cstdio"
 
-//////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////
 // Potential Junk, save it here
-//////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////
 
-static void MakeComplementSet(int start, int end, const set<int> &origSet,
-                              set<int> &compSet) {
-  for (int i = start; i <= end; ++i) {
-    if (origSet.find(i) == origSet.end()) {
-      compSet.insert(i);
-    }
-  }
+static void MakeComplementSet( int start, int end,
+							  const set<int> &origSet, set<int> &compSet )
+{
+	for(int i=start; i<= end; ++i)
+	{
+		if( origSet.find( i ) == origSet.end() )
+		{
+			compSet.insert( i);
+		}
+	}
 }
 
 #if 0
@@ -27,377 +30,479 @@ static long ComputeCombNum(int n, int k)
 }
 #endif
 
-//////////////////////////////////////////////////////////////////////////////
-void SubtractSets(set<int> &s1, const set<int> &s2) {
-  if (s2.size() == 0) {
-    return;
-  }
-  set<int> res;
-  // this function performs set intersection, i.e. s1=s1 ^s2
-  for (set<int>::iterator it = s1.begin(); it != s1.end(); ++it) {
-    if (s2.find(*it) == s2.end()) {
-      res.insert(*it);
+/////////////////////////////////////////////////////////////////////////////////////////
+void SubtractSets( set<int> &s1, const set<int> &s2)
+{
+    if( s2.size() == 0)
+    {
+        return;
     }
-  }
-  s1.clear();
-  s1 = res;
+    set<int> res;
+	// this function performs set intersection, i.e. s1=s1 ^s2
+	for( set<int>::iterator it = s1.begin(); it!= s1.end(); ++it)
+	{
+		if( s2.find( *it ) == s2.end() )
+		{
+			res.insert( *it );
+		}
+	}
+    s1.clear();
+    s1 = res;
 }
 
-void JoinSets(const set<int> &s1, const set<int> &s2, set<int> &res) {
+void JoinSets( const set<int> &s1, const set<int> &s2, set<int> &res)
+{
   res.clear();
-  for (set<int>::iterator it = s1.begin(); it != s1.end(); ++it) {
-    if (s2.find(*it) != s2.end()) {
-      res.insert(*it);
+  for(set<int>::iterator it = s1.begin(); it!= s1.end(); ++it)
+    {
+      if( s2.find(*it) != s2.end() )
+	{
+	  res.insert( *it );
+	}
     }
-  }
 }
 
-void UnionSets(set<int> &sTotal, const set<int> &sToBeAdd) {
-  for (set<int>::iterator it = sToBeAdd.begin(); it != sToBeAdd.end(); ++it) {
-    sTotal.insert(*it);
-  }
+void UnionSets(set<int> &sTotal, const set<int> &sToBeAdd)
+{
+	for( set<int>::iterator it = sToBeAdd.begin(); it!= sToBeAdd.end(); ++it)
+	{
+        sTotal.insert (*it);
+	}
 }
 
 // templates
-void JoinSets(const set<char> &s1, const set<char> &s2, set<char> &res) {
+void JoinSets( const set<char> &s1, const set<char> &s2, set<char> &res)
+{
   res.clear();
-  for (set<char>::iterator it = s1.begin(); it != s1.end(); ++it) {
-    if (s2.find(*it) != s2.end()) {
-      res.insert(*it);
-    }
+  for(set<char>::iterator it = s1.begin(); it!= s1.end(); ++it)
+  {
+      if( s2.find(*it) != s2.end() )
+	{
+	  res.insert( *it );
+	}
   }
 }
-void SubtractSets(set<char> &s1, const set<char> &s2) {
-  if (s2.size() == 0) {
-    return;
-  }
-  set<char> res;
-  // this function performs set intersection, i.e. s1=s1 ^s2
-  for (set<char>::iterator it = s1.begin(); it != s1.end(); ++it) {
-    if (s2.find(*it) == s2.end()) {
-      res.insert(*it);
+void SubtractSets(set<char> &s1, const set<char> &s2)
+{
+    if( s2.size() == 0)
+    {
+        return;
     }
-  }
-  s1.clear();
-  s1 = res;
+    set<char> res;
+	// this function performs set intersection, i.e. s1=s1 ^s2
+	for( set<char>::iterator it = s1.begin(); it!= s1.end(); ++it)
+	{
+		if( s2.find( *it ) == s2.end() )
+		{
+			res.insert( *it );
+		}
+	}
+    s1.clear();
+    s1 = res;
 }
-void UnionSets(set<char> &sTotal, const set<char> &sToBeAdd) {
-  for (set<char>::iterator it = sToBeAdd.begin(); it != sToBeAdd.end(); ++it) {
-    sTotal.insert(*it);
-  }
+void UnionSets(set<char> &sTotal, const set<char> &sToBeAdd)
+{
+	for( set<char>::iterator it = sToBeAdd.begin(); it!= sToBeAdd.end(); ++it)
+	{
+        sTotal.insert (*it);
+	}
 }
-void DumpSet(const set<char> &s) {
-  cout << "Set contains: ";
-  for (set<char>::iterator it = s.begin(); it != s.end(); ++it) {
-    cout << (int)*it << ",";
-  }
-  cout << endl;
+void DumpSet( const set<char> &s)
+{
+	cout << "Set contains: ";
+	for(set<char> :: iterator it = s.begin(); it != s.end(); ++it)
+	{
+		cout << (int)*it << ",";
+	}
+	cout << endl;
 }
 
-void ConvIntSetToCharSet(const set<int> &si, set<char> &sc) {
-  sc.clear();
-  for (set<int>::iterator it = si.begin(); it != si.end(); ++it) {
-    sc.insert((int)*it);
-  }
+void ConvIntSetToCharSet( const set<int> &si, set<char> &sc )
+{
+	sc.clear();
+	for( set<int> :: iterator it =si.begin(); it != si.end(); ++it )
+	{
+		sc.insert( (int)*it );
+	}
 }
-void ConvCharSetToIntSet(const set<char> &sc, set<int> &si) {
-  si.clear();
-  for (set<char>::iterator it = sc.begin(); it != sc.end(); ++it) {
-    si.insert(*it);
-  }
+void ConvCharSetToIntSet( const set<char> &sc, set<int> &si )
+{
+	si.clear();
+	for( set<char> :: iterator it =sc.begin(); it != sc.end(); ++it )
+	{
+		si.insert( *it );
+	}
 }
 
+
+
 // others
-void RmIntValFromSet(set<int> &s, int v) {
-  for (set<int>::iterator it = s.begin(); it != s.end(); ++it) {
-    if (*it == v) {
-      s.erase(it);
-      return;
-    }
-  }
+void RmIntValFromSet( set<int> &s, int v)
+{
+	for( set<int> ::iterator it = s.begin(); it != s.end(); ++it)
+	{
+		if( *it == v)
+		{
+			s.erase( it );
+			return;
+		}
+	}
 }
 
-void DumpIntSet(const set<int> &incSet) {
-  //#ifdef BG_DEBUG
-  cout << "Set contains: ";
-  for (set<int>::iterator it = incSet.begin(); it != incSet.end(); ++it) {
-    cout << *it << ",";
-  }
-  cout << endl;
-  //#endif
+void DumpIntSet(const set<int> &incSet)
+{
+//#ifdef BG_DEBUG
+	cout << "Set contains: ";
+	for(set<int> :: iterator it = incSet.begin(); it != incSet.end(); ++it)
+	{
+		cout << *it << ",";
+	}
+	cout << endl;
+//#endif
 }
 
-void DumpIntSetNoReturn(const set<int> &incSet) {
-  for (set<int>::iterator it = incSet.begin(); it != incSet.end(); ++it) {
-    cout << *it << ",";
-  }
+void DumpIntSetNoReturn(const set<int> &incSet)
+{
+	for(set<int> :: iterator it = incSet.begin(); it != incSet.end(); ++it)
+	{
+		cout << *it << ",";
+	}
 }
 
-void DumpIntVec(const vector<int> &intVec) {
-  cout << "Vector contains: ";
-  for (int i = 0; i < intVec.size(); ++i) {
-    cout << intVec[i] << ",";
-  }
-  cout << endl;
+void DumpIntVec(const vector<int> &intVec)
+{
+	cout << "Vector contains: ";
+	for(int i=0; i<intVec.size(); ++i)
+	{
+		cout << intVec[i]  << ",";
+	}
+	cout << endl;
+
 }
 
-void PopulateSetByVec(set<int> &dest, const vector<int> &srcVec) {
-  dest.clear();
-  for (int i = 0; i < srcVec.size(); ++i) {
-    dest.insert(srcVec[i]);
-  }
+void PopulateSetByVec( set<int> &dest, const vector<int> &srcVec)
+{
+    dest.clear();
+	for(int i=0; i<srcVec.size(); ++i)
+	{
+		dest.insert( srcVec[i] );
+	}
 }
 
-void PopulateVecBySet(vector<int> &dest, const set<int> &srcSet) {
-  dest.clear();
-  for (set<int>::iterator it = srcSet.begin(); it != srcSet.end(); ++it) {
-    dest.push_back(*it);
-  }
+void PopulateVecBySet( vector<int> &dest, const set<int> &srcSet)
+{
+    dest.clear();
+    for( set<int> :: iterator it = srcSet.begin(); it != srcSet.end(); ++it)
+    {
+        dest.push_back( *it );
+    }
 }
 
-void CopyIntSet(set<int> &dest, const set<int> &src) {
-  dest.clear();
-  for (set<int>::iterator it = src.begin(); it != src.end(); ++it) {
-    dest.insert(*it);
-  }
+
+void CopyIntSet(set<int> & dest, const set<int> &src)
+{
+	dest.clear();
+	for(set<int>::iterator it = src.begin(); it != src.end(); ++it)
+	{
+		dest.insert ( *it );
+	}
+
 }
 
-void CopyIntVec(vector<int> &dest, const vector<int> &src) {
-  dest.clear();
-  for (int i = 0; i < src.size(); ++i) {
-    dest.push_back(src[i]);
-  }
+
+void CopyIntVec(vector<int> & dest, const vector<int> &src)
+{
+	dest.clear();
+	for(int i=0; i<src.size(); ++i)
+	{
+		dest.push_back( src[i] );
+	}
 }
 
-void CopySetIntVec(set<vector<int> > &dest, const set<vector<int> > &src) {
-  dest.clear();
-  for (set<vector<int> >::iterator it = src.begin(); it != src.end(); ++it) {
-    vector<int> v;
-    CopyIntVec(v, *it);
-    dest.insert(v);
-  }
+void CopySetIntVec( set< vector<int> > &dest, const set< vector<int> > &src)
+{
+	dest.clear();
+	for( set< vector<int> > :: iterator it = src.begin(); it != src.end(); ++it)
+	{
+		vector<int> v;
+		CopyIntVec( v,  *it );
+		dest.insert(v);
+	}
 }
 
-bool IsVecSame(const vector<int> &v1, const vector<int> &v2) {
-  if (v1.size() != v2.size()) {
-    return false;
-  }
-  for (int i = 0; i < v1.size(); ++i) {
-    if (v1[i] != v2[i]) {
-      return false;
-    }
-  }
-  return true;
+bool IsVecSame( const vector<int> &v1, const vector<int> &v2)
+{
+	if( v1.size() != v2.size() )
+	{
+		return false;
+	}
+	for(int i=0; i<v1.size(); ++i)
+	{
+		if( v1[i] != v2[i])
+		{
+			return false;
+		}
+	}
+	return true;
 }
 
-bool IsIntVecInSet(const set<vector<int> > &s, const vector<int> &v) {
-  for (set<vector<int> >::iterator it = s.begin(); it != s.end(); ++it) {
-    vector<int> v1 = *it;
-    if (IsVecSame(v, v1) == true) {
-      return true;
-    }
-  }
-  return false;
+bool IsIntVecInSet ( const set< vector<int> > &s, const vector<int> &v)
+{
+	for(set< vector<int> > :: iterator it = s.begin(); it != s.end(); ++it)
+	{
+		vector<int> v1 = *it;
+		if(IsVecSame ( v, v1) == true )
+		{
+			return true;
+		}
+	}
+	return false;
 }
 
 // The following two functions could be used in dynamic programing
 // when, for example, we need to consider all sets
 // One limitation is that it is limited to integer range
 // that up to 32 bits
-void ConvIntToVec(unsigned int val, vector<int> &vec, int numBits) {
-  // we would store the least significant bit as vec[0]
-  vec.clear();
-  if (numBits <= 32) {
-    for (int i = 0; i < numBits; ++i) {
-      if ((val & 0x1) == 0) {
-        vec.push_back(0);
-        //				vec.insert( vec.begin(), 0);
-      } else {
-        vec.push_back(1);
-        //				vec.insert( vec.begin(), 1);
-      }
-      val = val >> 1;
-    }
-  }
+void ConvIntToVec(unsigned int val, vector<int> &vec, int numBits)
+{
+	// we would store the least significant bit as vec[0]
+	vec.clear();
+	if(numBits <= 32)
+	{
+		for(int i=0; i<numBits; ++i)
+		{
+			if( (val & 0x1) == 0)
+			{
+				vec.push_back( 0 );
+//				vec.insert( vec.begin(), 0);
+			}
+			else
+			{
+				vec.push_back( 1 );
+//				vec.insert( vec.begin(), 1);
+			}
+			val = val >> 1;
+		}
+	}
 }
 
-unsigned int ConvVecToInt(const vector<int> &vec) {
-  // assume vec[0] is least siginicant
-  unsigned int res = 0;
+unsigned int ConvVecToInt( const vector<int> &vec)
+{
+	// assume vec[0] is least siginicant
+	unsigned int res = 0;
 
-  for (int i = vec.size() - 1; i >= 0; --i) {
-    YW_ASSERT_INFO(vec[i] == 0 || vec[i] == 1,
-                   "In ConvVecToInt, vector is not binary.");
-    // cout << "res = " << res << endl;
-    if (vec[i] == 1) {
-      res += 1;
-    }
-    if (i > 0) {
-      res = res << 1;
-    }
-  }
+	for(int i=vec.size()-1; i >= 0; --i)
+	{
+        YW_ASSERT_INFO( vec[i] == 0 || vec[i] == 1, "In ConvVecToInt, vector is not binary."  );
+//cout << "res = " << res << endl;
+		if(vec[i] == 1)
+		{
+			res += 1;
+		}
+		if( i > 0)
+		{
+			res = res << 1;
+		}
+	}
 
-  return res;
+	return res;
 }
 
-void ConvIntToVecMSB(unsigned int val, vector<int> &vec, int numBits) {
-  // we would store the least significant bit as vec[0]
-  YW_ASSERT_INFO(numBits <= 32, "ConvIntToVecMSB :: numBits is too large.");
-  ConvIntToVec(val, vec, numBits);
-  ReverseIntVec(vec);
+void ConvIntToVecMSB(unsigned int val, vector<int> &vec, int numBits)
+{
+	// we would store the least significant bit as vec[0]
+    YW_ASSERT_INFO( numBits <= 32 , "ConvIntToVecMSB :: numBits is too large.");
+    ConvIntToVec(val, vec, numBits);
+    ReverseIntVec(vec);
 }
 
-unsigned int ConvVecToIntMSB(const vector<int> &vec) {
-  vector<int> vecMSB = vec;
-  // cout << "vec = ";
-  // DumpIntVec( vec );
-  ReverseIntVec(vecMSB);
-  // cout << "vec = ";
-  // DumpIntVec( vec );
-  return ConvVecToInt(vecMSB);
+unsigned int ConvVecToIntMSB( const vector<int> &vec)
+{
+    vector<int> vecMSB = vec;
+//cout << "vec = ";
+//DumpIntVec( vec );
+    ReverseIntVec(vecMSB);
+//cout << "vec = ";
+//DumpIntVec( vec );
+    return ConvVecToInt(vecMSB);
 }
 
-void ReverseIntVec(vector<int> &vec) {
-  // cout << "Before switching: vec = ";
-  // DumpIntVec( vec );
-  // This function would reverse the integer vector, i.e. vec[0] = vec[n-1] and
-  // so on
-  for (int i = 0; i < vec.size() / 2; ++i) {
-    int tmp = vec[vec.size() - 1 - i];
-    vec[vec.size() - 1 - i] = vec[i];
-    vec[i] = tmp;
-  }
-  // cout << "After switching: vec = ";
-  // DumpIntVec( vec );
+void ReverseIntVec( vector<int> &vec)
+{
+//cout << "Before switching: vec = ";
+//DumpIntVec( vec );
+    // This function would reverse the integer vector, i.e. vec[0] = vec[n-1] and so on
+    for(int i=0; i<vec.size()/2; ++i)
+    {
+        int tmp = vec[ vec.size()- 1 -i ];
+        vec[ vec.size()- 1 -i ] = vec[i];
+        vec[i] = tmp;
+    }
+//cout << "After switching: vec = ";
+//DumpIntVec( vec );
 }
 
-unsigned int CalcBitInt(int pos, int width) {
-  return 0x1 << (width - 1 - pos);
-  //	return 0x1 << pos;
+
+unsigned int CalcBitInt( int pos, int width )
+{
+	return 0x1 << (width - 1 - pos);
+//	return 0x1 << pos;
 }
 
 // This function update the eumeration index into different set size
 // it works as following: suppose we have 3 sets of size 2,3,2 and
 // initially the index is 0,1,0.
-// Then after calling this function it becomes 0,1,1. And next time, since we
-// reach the set bound, we have to change it to 0,2,0 (remember reseting to 0 at
-// pos 3) This function returns false when we reach the end
-bool GetNextEnumVec(vector<int> &curPos, const vector<int> &limitvec) {
-  if (limitvec.size() != curPos.size()) {
-    return false;
-  }
+// Then after calling this function it becomes 0,1,1. And next time, since we reach the
+// set bound, we have to change it to 0,2,0 (remember reseting to 0 at pos 3)
+// This function returns false when we reach the end
+bool GetNextEnumVec( vector<int>& curPos, const vector<int> &limitvec)
+{
+	if(limitvec.size() != curPos.size() )
+	{
+		return false;
+	}
 
-  // Now we find the last position i in curPos,
-  // s.t. curPos[i] < limitvec[i]
-  int i = -1;
-  for (i = curPos.size() - 1; i >= 0; --i) {
-    if (curPos[i] >= limitvec[i]) {
-      return false;
-    }
+	// Now we find the last position i in curPos,
+	// s.t. curPos[i] < limitvec[i]
+	int i = -1;
+	for(i= curPos.size()-1; i>=0; --i)
+	{
+		if( curPos[i] >= limitvec[i])
+		{
+			return false;
+		}
 
-    if (curPos[i] < limitvec[i] - 1) {
-      break;
-    }
-  }
+		if(curPos[i] < limitvec[i] - 1 )
+		{
+			break;
+		}
+	}
 
-  if (i < 0) {
-    // OK, we can not continue since we have run out of search space
-    return false;
-  }
+	if(i < 0)
+	{
+		// OK, we can not continue since we have run out of search space
+		return false;
+	}
 
-  // Ohterwise, we increment this position and reset the following positions
-  curPos[i] = curPos[i] + 1;
-  for (int j = i + 1; j < curPos.size(); ++j) {
-    curPos[j] = 0;
-  }
-  return true;
+	// Ohterwise, we increment this position and reset the following positions
+	curPos[i] = curPos[i] + 1;
+	for(int j=i+1; j<curPos.size(); ++j)
+	{
+		curPos[j] = 0;
+	}
+	return true;
 }
 
-void YW_ASSERT(bool f) {
-  if (f == false) {
-    cout << "Assertion error" << endl;
-    exit(1);
-  }
+void YW_ASSERT(bool f)
+{
+	if( f == false)
+	{
+		cout << "Assertion error" << endl;
+		exit(1);
+	}
 }
 
-void YW_ASSERT_INFO(bool f, const char *info) {
-  //#if 0
-  if (f == false) {
-    cout << "Assertion Error: " << info << endl;
-    ;
-    exit(1);
-  }
-  //#endif
+void YW_ASSERT_INFO( bool f, const char *info)
+{
+//#if 0
+    if( f== false )
+    {
+        cout << "Assertion Error: " << info << endl;;
+        exit(1);
+    }
+//#endif
 }
 
-void RemoveFromIntSet(vector<int> &targetSet, int val) {
-  for (vector<int>::iterator it = targetSet.begin(); it != targetSet.end();
-       ++it) {
-    if (*it == val) {
-      targetSet.erase(it);
-      return;
-    }
-  }
+void RemoveFromIntSet( vector<int> &targetSet, int val )
+{
+	for(vector<int> ::iterator it = targetSet.begin(); it != targetSet.end(); ++it)
+	{
+		if( *it == val)
+		{
+			targetSet.erase( it );
+			return;
+		}
+	}
 }
 
-bool IsIntSetEquiv(const set<vector<int> > &s1, const set<vector<int> > &s2) {
-  if (s1.size() != s2.size()) {
-    return false;
-  }
-  // we check to see if every element in s1 is also in s2
-  for (set<vector<int> >::iterator it = s1.begin(); it != s1.end(); ++it) {
-    if (s2.find(*it) == s2.end()) {
-      return false;
-    }
-  }
-  return true;
+
+
+bool IsIntSetEquiv(const set< vector<int> > &s1, const set< vector<int> >&s2)
+{
+	if( s1.size() != s2.size() )
+	{
+		return false;
+	}
+	// we check to see if every element in s1 is also in s2
+	for( set< vector<int> >:: iterator it = s1.begin(); it != s1.end(); ++it)
+	{
+		if( s2.find( *it) == s2.end() )
+		{
+			return false;
+		}
+	}
+	return true;
 }
 
-void OrderInt(int &i1, int &i2) {
-  // Exchange two number if i1 is greater than i2
-  if (i1 > i2) {
-    int tmp = i2;
-    i2 = i1;
-    i1 = tmp;
-  }
+void OrderInt( int&i1, int &i2)
+{
+	// Exchange two number if i1 is greater than i2
+	if( i1 > i2)
+	{
+		int tmp = i2;
+		i2 = i1;
+		i1 = tmp;
+	}
 }
 
-static int QSortCompare(const void *arg1, const void *arg2) {
-  /* Compare all of both strings: */
-  // assume sorting in accending order
-  int n1 = *((int *)arg1);
-  int n2 = *((int *)arg2);
-  // cout <<"arg1 = " << n1 << ", arg2 = " << n2 << endl;
-  if (n1 > n2) {
-    return 1;
-  } else if (n1 < n2) {
-    return -1;
-  } else {
-    return 0;
-  }
+static int QSortCompare( const void *arg1, const void *arg2 )
+{
+   /* Compare all of both strings: */
+    // assume sorting in accending order
+    int n1 = *((int *) arg1);
+    int n2 = *((int *) arg2);
+//cout <<"arg1 = " << n1 << ", arg2 = " << n2 << endl;
+    if( n1 > n2)
+    {
+        return 1;
+    }
+    else if( n1 < n2)
+    {
+        return -1;
+    }
+    else
+    {
+        return 0;
+    }
 }
 
-void SortIntVec(vector<int> &vec, int start, int end) {
-  //#if 0
-  if (vec.size() == 0) {
-    // do nothing
-    return;
-  }
-  if (end < 0) {
-    end = vec.size() - 1;
-  }
-  int sortLen = end - start + 1;
-  int *array = new int[sortLen];
-  for (int i = start; i <= end; ++i) {
-    array[i - start] = vec[i];
-  }
-  qsort((void *)array, sortLen, sizeof(int), QSortCompare);
-  // Now write back
-  for (int i = start; i <= end; ++i) {
-    vec[i] = array[i - start];
-  }
 
-  delete[] array;
+void SortIntVec( vector<int> &vec, int start, int end)
+{
+//#if 0
+    if( vec.size() == 0)
+    {
+        // do nothing
+        return;
+    }
+	if (end < 0 )
+	{
+		end = vec.size() - 1;
+	}
+    int sortLen = end - start +1;
+    int *array = new int[sortLen];
+    for(int i=start; i<= end; ++i)
+    {
+        array[i-start] = vec[i];
+    }
+    qsort( (void *)array, sortLen, sizeof( int ), QSortCompare );
+    // Now write back
+    for(int i=start; i<=end; ++i)
+    {
+        vec[i] = array[i-start];
+    }
+
+    delete [] array;
 //#endif
 #if 0
 	// Sort the vector, by the most obvious method
@@ -430,17 +535,21 @@ void SortIntVec(vector<int> &vec, int start, int end) {
 #endif
 }
 
-void GetFirstCombo(int k, int n, vector<int> &posvec) {
-  posvec.clear();
-  for (int i = 0; i < k; ++i) {
-    posvec.push_back(i);
-  }
+
+void GetFirstCombo(int k, int n, vector<int>& posvec)
+{
+	posvec.clear();
+	for(int i=0; i<k; ++i)
+	{
+		posvec.push_back(i);
+	}
 }
 
-bool GetNextCombo(int k, int n, vector<int> &posvec) {
-  // The idea is to move the rightmost (movable) value to the right
-  int startpos = k - 1;
-  return GetNextComboFrom(k, n, posvec, startpos);
+bool GetNextCombo(int k, int n, vector<int>& posvec)
+{
+	// The idea is to move the rightmost (movable) value to the right
+	int startpos = k-1;
+    return GetNextComboFrom( k, n, posvec, startpos);
 #if 0
 	while(pos >= 0)
 	{
@@ -462,137 +571,167 @@ bool GetNextCombo(int k, int n, vector<int> &posvec) {
 #endif
 }
 
-bool GetNextComboFrom(int k, int n, vector<int> &posvec, int startpos) {
-  // This function differs from the previous one in that it starts moving
-  // forward
-  // not neccessary from pos=k-1, but from the given startpos
-  // this allows flexibility of bypassing searching when that space will not be
-  // productive
-  int pos = startpos;
-  while (pos >= 0) {
-    if (posvec[pos] < pos + (n - k)) {
-      posvec[pos] = posvec[pos] + 1;
-      for (int i = pos + 1; i < k; ++i) {
-        posvec[i] = i + (posvec[pos] - pos);
-      }
-      return true;
-    } else {
-      pos--;
-    }
-  }
-  return false;
-}
-
-#if 0
-int ConvComboToIndex(int numCells, const vector<int> &posvec)
+bool GetNextComboFrom(int k, int n, vector<int> &posvec, int startpos)
 {
-	// This function converts a position vector into an index
-	// This is useful when performing dynamic programming
-	// The idea is to check each position in vector
-	// For i-th position, if it is greater than i, then
-	// plus C(n-posvec[i], k-i).
-	int res = 0;
-
-	return res;
-}
-#endif
-
-double GetRandFraction() {
-  // Now we try random method. Flip a coin, to decide whether to take this new
-  // choice
-  static bool isRandInit = false;
-  if (isRandInit == false) {
-    srand((unsigned)time(NULL));
-    isRandInit = true;
-  }
+	// This function differs from the previous one in that it starts moving forward
+    // not neccessary from pos=k-1, but from the given startpos
+    // this allows flexibility of bypassing searching when that space will not be productive
+	int pos = startpos;
+	while(pos >= 0)
+	{
+		if( posvec[pos] < pos + (n-k) )
+		{
+			posvec[pos] = posvec[pos] + 1;
+			for(int i=pos+1; i<k; ++i)
+			{
+				posvec[i] = i + (posvec[pos] - pos);
+			}
+			return true;
+		}
+		else
+		{
+			pos --;
+		}
+	}
+	return false;
+}
+
+
+#if 0
+int ConvComboToIndex(int numCells, const vector<int> &posvec)
+{
+	// This function converts a position vector into an index
+	// This is useful when performing dynamic programming
+	// The idea is to check each position in vector
+	// For i-th position, if it is greater than i, then
+	// plus C(n-posvec[i], k-i).
+	int res = 0;
 
-  double c = (double)(rand()) / RAND_MAX;
-  return c;
+	return res;
 }
+#endif
 
-void GetBoolVec(int num, const vector<int> &posvec, vector<bool> &bvec) {
-  int pos = 0;
-  for (int i = 0; i < num; ++i) {
+double GetRandFraction()
+{
+	// Now we try random method. Flip a coin, to decide whether to take this new choice
+	static bool isRandInit = false;
+	if(isRandInit == false)
+	{
+		 srand( (unsigned)time( NULL ) );
+		isRandInit = true;
+	}
 
-    if (pos >= posvec.size() || i < posvec[pos]) {
-      bvec.push_back(false);
-    } else if (i == posvec[pos]) {
-      bvec.push_back(true);
-      pos++;
-    } else {
-      YW_ASSERT_INFO(false, "GetBoolVec");
-    }
-  }
+	double c = (double) ( rand() ) / RAND_MAX;
+	return c;
 }
 
-void GetIntVec(int num, const vector<int> &posvec, vector<int> &bvec) {
-  bvec.clear();
-  int pos = 0;
-  for (int i = 0; i < num; ++i) {
+void GetBoolVec(int num, const vector<int> &posvec, vector<bool>& bvec)
+{
+	int pos = 0;
+	for(int i=0; i<num; ++i)
+	{
 
-    if (pos >= posvec.size() || i < posvec[pos]) {
-      bvec.push_back(0);
-    } else if (i == posvec[pos]) {
-      bvec.push_back(1);
-      pos++;
-    } else {
-      YW_ASSERT_INFO(false, "GetIntVec");
-    }
-  }
+		if(pos >= posvec.size() ||  i < posvec[pos] )
+		{
+			bvec.push_back( false );
+		}
+		else if( i == posvec[pos] )
+		{
+			bvec.push_back( true );
+			pos++;
+		}
+		else
+		{
+			YW_ASSERT_INFO(false, "GetBoolVec");
+		}
+	}
+}
+
+void GetIntVec(int num, const vector<int> &posvec, vector<int>& bvec)
+{
+	bvec.clear();
+	int pos = 0;
+	for(int i=0; i<num; ++i)
+	{
+
+		if(pos >= posvec.size() ||  i < posvec[pos] )
+		{
+			bvec.push_back( 0 );
+		}
+		else if( i == posvec[pos] )
+		{
+			bvec.push_back( 1 );
+			pos++;
+		}
+		else
+		{
+			YW_ASSERT_INFO(false, "GetIntVec");
+		}
+	}
 }
 
 // Coomposite bound operations
-int CalcCompositeBound(map<INTERVAL, int> &mapIntervalBds, int left, int right,
-                       vector<int> &locBreakpoints) {
-  // This method outputs a composite bound for the given interval
-  int res = 0;
-
-  int lenInterval = right - left + 1;
-  vector<int> lbHelper;
-  // Initialize our lb helper data
-  for (int i = 0; i < lenInterval; ++i) {
-    lbHelper.push_back(0);
-  }
+int CalcCompositeBound(  map<INTERVAL, int>& mapIntervalBds, int left, int right, vector<int> &locBreakpoints )
+{
+	// This method outputs a composite bound for the given interval
+	int res = 0;
 
-  // Now we scan through all the initervals in range (from 'left' to 'right')
-  // we also need to make sure we start by sotring interval based on its right
-  // end
-  for (int re = left + 1; re <= right; ++re) {
-    for (int le = left; le < re; ++le) {
-      // we now consider the interval [le, re]
-      INTERVAL iv(le, re);
-      if (mapIntervalBds.find(iv) == mapIntervalBds.end()) {
-        // nothing needs to be done, if interval is not in map
-        continue;
-      }
-      int valInt = mapIntervalBds[iv];
-
-      // we now figure out lbHelper value based on the value
-      int lbSofar = 0;
-      for (int i = le; i < re; ++i) {
-        lbSofar += lbHelper[i - left];
-      }
-      if (lbSofar < valInt) {
-        // we make up the diff in the last slot
-        lbHelper[re - left - 1] += valInt - lbSofar;
-      }
-    }
-  }
+	int lenInterval = right-left+1;
+	vector<int> lbHelper;
+	// Initialize our lb helper data
+	for(int i=0; i<lenInterval; ++i)
+	{
+		lbHelper.push_back( 0 );
+	}
 
-  // Finally, we tally the result
-  for (int i = 0; i < lenInterval; ++i) {
-    if (lbHelper[i] != 0) {
-      for (int j = 0; j < lbHelper[i]; ++j) {
-        locBreakpoints.push_back(i + left);
-      }
-#if 0
+	// Now we scan through all the initervals in range (from 'left' to 'right')
+	// we also need to make sure we start by sotring interval based on its right end
+	for(int re = left+1; re<=right; ++re)
+	{
+		for(int le = left; le <re; ++le)
+		{
+			// we now consider the interval [le, re]
+			INTERVAL iv(le, re);
+			if( mapIntervalBds.find( iv ) == mapIntervalBds.end() )
+			{
+				// nothing needs to be done, if interval is not in map
+				continue;
+			}
+			int valInt = mapIntervalBds[ iv ];
+
+			// we now figure out lbHelper value based on the value
+			int lbSofar = 0;
+			for(int i=le; i < re; ++i)
+			{
+				lbSofar += lbHelper[  i-left  ];
+			}
+			if( lbSofar < valInt)
+			{
+				// we make up the diff in the last slot
+				lbHelper[ re - left -1] += valInt - lbSofar;
+			}
+		}
+	}
+
+	// Finally, we tally the result
+	for(int i=0; i<lenInterval; ++i)
+	{
+        if( lbHelper[i] != 0)
+        {
+            for(int j=0; j<lbHelper[i]; ++j)
+            {
+                locBreakpoints.push_back( i+left );
+            }
+        #if 0
           cout << "Between site " << i+1 << " and site " << i+2 << ", there are " << lbHelper[i]  << " recombs." << endl;
-#endif
-    }
-    res += lbHelper[i];
-  }
+        #endif
+        }
+		res += lbHelper[i];
+	}
+
+
+	return res;
 
-  return res;
 
 #if 0
 	// This method outputs a composite bound for the given interval
@@ -655,90 +794,107 @@ int CalcCompositeBound(map<INTERVAL, int> &mapIntervalBds, int left, int right,
 #endif
 }
 
-void OutputBounds(char *boundsFileName, map<INTERVAL, int> &mapIntervalBds,
-                  int nSites) {
-  // This function outputs results (that are stored inside a map)
-  // First open a file as named as passed in
-  // Now open file to write out
-  //    char fname[1024];
-  //    strcpy( fname, boundsFileName );
-  ofstream outFile(boundsFileName);
-  if (outFile.is_open() == false) {
-    cout << "Can not open output file: " << boundsFileName << endl;
-    return;
-  }
+void  OutputBounds(char *boundsFileName, map<INTERVAL, int>& mapIntervalBds, int nSites)
+{
+	// This function outputs results (that are stored inside a map)
+	// First open a file as named as passed in
+	// Now open file to write out
+//    char fname[1024];
+//    strcpy( fname, boundsFileName );
+	ofstream outFile(  boundsFileName  );
+	if(outFile.is_open() == false)
+	{
+		cout << "Can not open output file: "<< boundsFileName <<  endl;
+		return;
+	}
 
-  outFile << "bounds-from-HapBound\n";
-  for (int i = 0; i < nSites - 1; ++i) {
-    for (int j = i + 1; j < nSites; ++j) {
-      INTERVAL iv(i, j);
-      if (mapIntervalBds.find(iv) != mapIntervalBds.end()) {
-        outFile << i + 1 << "  " << j + 1 << "  " << mapIntervalBds[iv] << endl;
-      } else {
-        cout << "Warning: interval not complete. Missing (" << i << ", " << j
-             << ")" << endl;
-      }
-    }
-  }
-  outFile.close();
+
+	outFile << "bounds-from-HapBound\n";
+	for(int i=0; i<nSites-1; ++i)
+	{
+		for(int j=i+1; j<nSites; ++j)
+		{
+			INTERVAL iv(i, j);
+			if(mapIntervalBds.find(iv) != mapIntervalBds.end() )
+			{
+				outFile << i+1 << "  " << j+1 << "  " << mapIntervalBds[iv] << endl;
+			}
+			else
+			{
+				cout << "Warning: interval not complete. Missing (" << i <<", " << j << ")" << endl;
+			}
+		}
+	}
+	outFile.close();
 }
 
 // Some combinatorial tricks
-void InitPermutation(vector<int> &nvec, const vector<int> &reference) {
-  // We ASSUME reference is already sorted
-  nvec = reference;
-  //	SortIntVec( nvec );
-}
-
-bool GetNextPermutation(vector<int> &nvec, const vector<int> &reference) {
-  // Now, we try to find the next position
-  // The idea is to start from the right, and check if we can use it as
-  // the starting location
-  for (int i = nvec.size() - 1; i >= 0; --i) {
-    // Make sure this number is not already maximum
-    if (nvec[i] == reference[reference.size() - 1]) {
-      continue;
-    }
-    // Now, we make sure there is at least one element to the right of it
-    int minLarger = HAP_MAX_INT;
-    int pos = -1;
-    for (int j = i + 1; j < nvec.size(); ++j) {
-      if (nvec[j] > nvec[i] && minLarger > nvec[j]) {
-        pos = j;
-        minLarger = nvec[j];
-      }
-    }
+void InitPermutation( vector<int> &nvec, const vector<int> &reference)
+{
+	// We ASSUME reference is already sorted
+	nvec = reference;
+//	SortIntVec( nvec );
+}
 
-    // If no such j is found, stop
-    if (pos < 0) {
-      continue;
-    }
 
-    // Otherwise, we stop here by taking this position
-    nvec[pos] = nvec[i];
-    nvec[i] = minLarger;
 
-    SortIntVec(nvec, i + 1, nvec.size() - 1);
-    return true;
-  }
+bool GetNextPermutation(vector<int> &nvec, const vector<int> &reference )
+{
+	// Now, we try to find the next position
+	// The idea is to start from the right, and check if we can use it as
+	// the starting location
+	for(int i=nvec.size()-1; i>=0; --i)
+	{
+		// Make sure this number is not already maximum
+		if( nvec[i] == reference[reference.size()-1] )
+		{
+			continue;
+		}
+		// Now, we make sure there is at least one element to the right of it
+		int minLarger = HAP_MAX_INT;
+		int pos = -1;
+		for(int j=i+1; j<nvec.size(); ++j)
+		{
+			if( nvec[j] > nvec[i] && minLarger > nvec[j] )
+			{
+				pos = j;
+				minLarger = nvec[j];
+			}
+		}
 
-  return false;
+		// If no such j is found, stop
+		if( pos < 0 )
+		{
+			continue;
+		}
+
+		// Otherwise, we stop here by taking this position
+		nvec[pos] = nvec[i];
+		nvec[i] = minLarger;
+
+		SortIntVec(nvec, i+1, nvec.size() - 1 );
+		return true;
+	}
+
+	return false;
 }
 
-int ConvertToLinear(int r1, int r2, int nRows) {
-  int n = nRows;
-  int n1 = (r1 + 1) * (n - 1) - ((r1 + 1) * r1) / 2;
-  return n1 - (n - r2);
+int ConvertToLinear(int r1, int r2, int nRows)
+{
+	int n = nRows;
+	int n1 = (r1+1)*(n-1) - ((r1+1)*r1)/2;
+	return  n1 - (n - r2) ;
 }
 
-int ConvertToLinearEq(int r1, int r2, int nRows) {
-  // The only difference from the above is this one allow r1= r2
-  int n = nRows;
-  int n1 = (r1 + 1) * (n - 1) - ((r1 + 1) * r1) / 2;
-  return n1 - (n - r2);
+int ConvertToLinearEq(int r1, int r2, int nRows)
+{
+    // The only difference from the above is this one allow r1= r2
+	int n = nRows;
+	int n1 = (r1+1)*(n-1) - ((r1+1)*r1)/2;
+	return  n1 - (n - r2) ;
 }
 
-// void ConvertLinearToTwoIndices( int idLinear, int nRows, int &r1, int &r2 )
+//void ConvertLinearToTwoIndices( int idLinear, int nRows, int &r1, int &r2 )
 //{
 //}
 
@@ -746,353 +902,429 @@ int ConvertToLinearEq(int r1, int r2, int nRows) {
 // Recombination/Mutation utilities
 //****************************************************************************************
 
-bool IsMissingValueBit(int bit) { return bit == MISSING_VALUE_BIT; }
+bool IsMissingValueBit( int bit )
+{
+    return bit == MISSING_VALUE_BIT;
+}
 
-int GetMissingValueBit() { return MISSING_VALUE_BIT; }
+int GetMissingValueBit( )
+{
+	return MISSING_VALUE_BIT;
+}
 
-bool IsTwoStatesCompatible(int bit1, int bit2) {
-  // we say two states are compatible if either they match exactly or one of
-  // them is missing value
-  return (bit1 == bit2) || IsMissingValueBit(bit1) || IsMissingValueBit(bit2);
+bool IsTwoStatesCompatible(int bit1, int bit2)
+{
+    // we say two states are compatible if either they match exactly or one of them is missing value
+    return (bit1 == bit2) ||  IsMissingValueBit(bit1) ||  IsMissingValueBit(bit2);
 }
-void FillVecWithMV(SEQUENCE &seq, int len) {
-  // note, do not clear up original seq
-  for (int i = 0; i < len; ++i) {
-    seq.push_back(MISSING_VALUE_BIT);
-  }
+void FillVecWithMV( SEQUENCE &seq, int len)
+{
+	// note, do not clear up original seq
+	for(int i=0; i<len; ++i)
+	{
+		seq.push_back( MISSING_VALUE_BIT );
+	}
 }
 
-bool IsSeqHasMV(const SEQUENCE &seq) {
-  for (int i = 0; i < (int)seq.size(); ++i) {
-    if (IsMissingValueBit(seq[i]) == true) {
-      return true;
+bool IsSeqHasMV( const SEQUENCE &seq )
+{
+    for( int i=0; i<(int)seq.size(); ++i )
+    {
+        if(IsMissingValueBit( seq[i] ) == true )
+        {
+            return true;
+        }
     }
-  }
-  return false;
+    return false;
 }
-bool AreTwoSeqsCompatible(const SEQUENCE &seq1, const SEQUENCE &seq2) {
-  if (seq1.size() != seq2.size()) {
-    return false; // size must match
-  }
-  for (int i = 0; i < (int)seq1.size(); ++i) {
-    if (IsTwoStatesCompatible(seq1[i], seq2[i]) == false) {
-      return false;
+bool AreTwoSeqsCompatible( const SEQUENCE &seq1, const SEQUENCE &seq2)
+{
+    if( seq1.size() != seq2.size() )
+    {
+        return false;   // size must match
     }
-  }
-  return true;
-}
-void GetCompatibleSeqForTwo(const SEQUENCE &seq1, const SEQUENCE &seq2,
-                            SEQUENCE &consensus) {
-  YW_ASSERT_INFO(seq1.size() == seq2.size(), "Size mismatch");
-
-  consensus.clear();
-  for (int i = 0; i < (int)seq1.size(); ++i) {
-    YW_ASSERT_INFO(IsTwoStatesCompatible(seq1[i], seq2[i]),
-                   "Can not form compatible");
-    if (IsMissingValueBit(seq1[i]) == false) {
-      consensus.push_back(seq1[i]);
-    } else {
-      consensus.push_back(seq2[i]);
+    for(int i=0; i<(int) seq1.size(); ++i)
+    {
+        if( IsTwoStatesCompatible( seq1[i], seq2[i] ) == false )
+        {
+            return false;
+        }
     }
-  }
+    return true;
 }
+void GetCompatibleSeqForTwo( const SEQUENCE &seq1, const SEQUENCE &seq2, SEQUENCE &consensus)
+{
+    YW_ASSERT_INFO( seq1.size() == seq2.size(), "Size mismatch" );
 
-extern void DumpSequence(const SEQUENCE &seq);
-void MutateSeqAtSite(SEQUENCE &seq, int site) {
-  // cout << "MutateSeqAtSite: seq = ";
-  // DumpSequence(seq);
-  // cout<< "site = " << site << endl;
-  YW_ASSERT_INFO(IsMissingValueBit(seq[site]) == false,
-                 "Can not mutate a missing value");
-  if (seq[site] == 0) {
-    seq[site] = 1;
-  } else {
-    seq[site] = 0;
-  }
+    consensus.clear();
+    for( int i=0; i<(int) seq1.size(); ++i)
+    {
+        YW_ASSERT_INFO(IsTwoStatesCompatible(seq1[i], seq2[i] ), "Can not form compatible");
+        if( IsMissingValueBit( seq1[i] ) == false  )
+        {
+            consensus.push_back(seq1[i]);
+        }
+        else
+        {
+            consensus.push_back(seq2[i]);
+        }
+
+    }
 }
 
-void RecombSequencesAt(const SEQUENCE &s1, const SEQUENCE &s2, int brPt,
-                       SEQUENCE &sr) {
-  // NOTE, ordering is important here. You may need to call this function
-  // twice, if you want to consider recombinatino from both direction
-  // This function assume the first part of s1 is taken first
-  // Another thing to note is 0 <= brPt <= s1.size()-2
-  sr.clear();
-  for (int i = 0; i <= brPt; ++i) {
-    sr.push_back(s1[i]);
-  }
-  for (int i = brPt + 1; i < s2.size(); ++i) {
-    sr.push_back(s2[i]);
-  }
+extern void DumpSequence( const SEQUENCE& seq );
+void MutateSeqAtSite(SEQUENCE &seq, int site)
+{
+//cout << "MutateSeqAtSite: seq = ";
+//DumpSequence(seq);
+//cout<< "site = " << site << endl;
+    YW_ASSERT_INFO( IsMissingValueBit( seq[site] ) == false, "Can not mutate a missing value"  );
+	if( seq[site] == 0)
+	{
+		seq[site] = 1;
+	}
+	else
+	{
+		seq[site] = 0;
+	}
 }
 
-bool IsSeqRecombinnable(const SEQUENCE &s1, const SEQUENCE &s2,
-                        const SEQUENCE &st) {
-  // note here, we do not differenceitate left or right
-  INTERVAL iv;
-  return IsSeqRecombinnableIV(s1, s2, st, iv) ||
-         IsSeqRecombinnableIV(s2, s1, st, iv);
-}
-
-bool IsSeqRecombinnableIV(const SEQUENCE &sleft, const SEQUENCE &sright,
-                          const SEQUENCE &st, INTERVAL &iv) {
-  // here assume s1 is left, and s2 is right. THIS IS IMPORTANT
-  // Here, iv returns the interval where the breakpoiint can fall
-  //    cout <<"s1.size = " << s1.size() << ", s2.size = " << s2.size() << ",
-  //    st.size = " << st.size() << endl;
-  YW_ASSERT((sleft.size() == sright.size()) && (sleft.size() == sright.size()));
-
-  // Now we exam the recombination of s1 and s2, into st
-  // first, we find the first location that does not match
-  int pos = 0;
-  while (pos < (int)sleft.size() &&
-         IsTwoStatesCompatible(sleft[pos], st[pos]) == true) {
-    pos++; // continue since they all match
-  }
-  // cout << "1. pos = " << pos << endl;
-  if (pos == (int)sleft.size()) {
-    iv.first = 0;
-    iv.second = (int)sleft.size() - 1;
-    return true;
-  }
+void RecombSequencesAt(const SEQUENCE &s1, const SEQUENCE &s2, int brPt, SEQUENCE &sr)
+{
+	// NOTE, ordering is important here. You may need to call this function
+	// twice, if you want to consider recombinatino from both direction
+	// This function assume the first part of s1 is taken first
+	// Another thing to note is 0 <= brPt <= s1.size()-2
+	sr.clear();
+	for(int i=0; i<=brPt; ++i)
+	{
+		sr.push_back( s1[i] );
+	}
+	for(int i=brPt+1; i < s2.size(); ++i)
+	{
+		sr.push_back( s2[i] );
+	}
+}
 
-  // If there is no matching prefix, there is no solution
-  if (pos == 0) {
-    return false;
-  }
+bool IsSeqRecombinnable(const SEQUENCE & s1, const SEQUENCE & s2, const SEQUENCE & st)
+{
+    // note here, we do not differenceitate left or right
+    INTERVAL iv;
+    return IsSeqRecombinnableIV( s1, s2, st, iv) || IsSeqRecombinnableIV( s2, s1, st, iv) ;
+}
 
-  // cout << "2. pos = " << pos << endl;
-
-  // If reach here, we are at the second difference, (the break point)
-  // Now the iv should be set to the maximal range where two sequence match
-  iv.first = pos - 1;
-  iv.second = pos - 1;
-  for (int i = pos - 1; i >= 0; --i) {
-    if (IsTwoStatesCompatible(sleft[i], sright[i]) == true) {
-      iv.first--;
-    } else {
-      break;
+bool IsSeqRecombinnableIV(const SEQUENCE & sleft, const SEQUENCE & sright, const SEQUENCE & st, INTERVAL &iv)
+{
+    // here assume s1 is left, and s2 is right. THIS IS IMPORTANT
+    // Here, iv returns the interval where the breakpoiint can fall
+//    cout <<"s1.size = " << s1.size() << ", s2.size = " << s2.size() << ", st.size = " << st.size() << endl;
+    YW_ASSERT( (sleft.size() == sright.size() ) && (sleft.size() == sright.size() ) );
+
+    // Now we exam the recombination of s1 and s2, into st
+	// first, we find the first location that does not match
+	int pos = 0;
+	while(pos <  (int)sleft.size() &&   IsTwoStatesCompatible( sleft[ pos ], st[ pos ] ) == true   )
+	{
+		pos ++;		// continue since they all match
+	}
+//cout << "1. pos = " << pos << endl;
+	if( pos == (int)sleft.size() )
+	{
+        iv.first = 0;
+        iv.second = (int)sleft.size()-1;
+		return true;
+	}
+
+    // If there is no matching prefix, there is no solution
+    if( pos == 0 )
+    {
+        return false;
     }
-  }
 
-  // cout << "3. pos = " << pos << endl;
+//cout << "2. pos = " << pos << endl;
 
-  while (pos < (int)sleft.size() &&
-         IsTwoStatesCompatible(sright[pos], st[pos]) == true) {
-    pos++;
-  }
-  // cout << "4. pos = " << pos << endl;
+	// If reach here, we are at the second difference, (the break point)
+    // Now the iv should be set to the maximal range where two sequence match
+    iv.first = pos-1;
+    iv.second = pos-1;
+    for(int i=pos-1; i >=0; --i)
+    {
+        if( IsTwoStatesCompatible(sleft[i], sright[i]) == true )
+        {
+            iv.first --;
+        }
+        else
+        {
+            break;
+        }
+    }
+
+//cout << "3. pos = " << pos << endl;
+
+	while( pos < (int)sleft.size() && IsTwoStatesCompatible( sright[ pos ], st[ pos ] ) == true )
+	{
+		pos ++;
+	}
+//cout << "4. pos = " << pos << endl;
+
+	if( pos == (int)sleft.size() )
+	{
+		return true;
+	}
+	else
+	{
+		return false;
+	}
 
-  if (pos == (int)sleft.size()) {
-    return true;
-  } else {
-    return false;
-  }
 }
 
-void AddUniqueSeqToVec(const SEQUENCE &seq, vector<SEQUENCE> &vecSeqs) {
-  for (int i = 0; i < vecSeqs.size(); ++i) {
-    if (vecSeqs[i] == seq) {
-      return; // Duplicate here
+
+void AddUniqueSeqToVec( const SEQUENCE &seq, vector<SEQUENCE> &vecSeqs)
+{
+  for(int i=0; i<vecSeqs.size(); ++i)
+    {
+      if( vecSeqs[i] == seq)
+	{
+	  return;  // Duplicate here
+	}
     }
-  }
-  vecSeqs.push_back(seq);
+  vecSeqs.push_back( seq );
 }
 
-bool IsSeqInVec(const SEQUENCE &seq, const vector<SEQUENCE> &vecSeqs) {
-  for (int i = 0; i < vecSeqs.size(); ++i) {
-    if (vecSeqs[i] == seq) {
-      return true; // Duplicate here
+bool IsSeqInVec( const SEQUENCE &seq, const vector<SEQUENCE> &vecSeqs)
+{
+  for(int i=0; i<vecSeqs.size(); ++i)
+    {
+      if( vecSeqs[i] == seq)
+	{
+	  return true;  // Duplicate here
+	}
     }
-  }
   return false;
 }
 
-bool IsSeqInSet(const SEQUENCE &seq, const set<SEQUENCE> &setSeqs) {
-  for (set<SEQUENCE>::iterator it = setSeqs.begin(); it != setSeqs.end();
-       ++it) {
-    if (*it == seq) {
-      return true; // Duplicate here
+bool IsSeqInSet( const SEQUENCE &seq, const set<SEQUENCE> &setSeqs)
+{
+    for(  set<SEQUENCE> :: iterator it = setSeqs.begin(); it != setSeqs.end(); ++it   )
+    {
+      if( *it == seq)
+	    {
+	      return true;  // Duplicate here
+	    }
     }
-  }
   return false;
 }
 
-void GetEqualSubseq(const SEQUENCE &seq1, const SEQUENCE &seq2, int seedPos,
-                    int &left, int &right) {
-  // This function gets the best matching regions between seq1/se2 around the
-  // seed Note, this function does not include comparision with the seedPos
-  // IMPORTANT!!!! That is, it EXCLUDES seedPos
-  if (seedPos < 0 || seedPos >= seq1.size()) {
-    left = right = -1;
-    return;
-  }
-  left = right = seedPos;
 
-  // Now start checking
-  int i;
-  for (i = seedPos - 1; i >= 0; i--) {
-    if (IsTwoStatesCompatible(seq1[i], seq2[i]) == false) {
-      break;
-    }
-  }
-  if (i >= 0) {
-    left = i;
-  }
-  for (i = seedPos + 1; i < seq1.size(); ++i) {
-    if (IsTwoStatesCompatible(seq1[i], seq2[i]) == false) {
-      break;
-    }
-  }
-  if (i < seq1.size()) {
-    right = i;
-  } else {
-    right = seq1.size() - 1;
-  }
+void GetEqualSubseq(const SEQUENCE &seq1, const SEQUENCE &seq2, int seedPos, int &left, int &right)
+{
+	// This function gets the best matching regions between seq1/se2 around the seed
+	// Note, this function does not include comparision with the seedPos
+	// IMPORTANT!!!! That is, it EXCLUDES seedPos
+	if(seedPos < 0 || seedPos >= seq1.size() )
+	{
+		left = right = -1;
+		return;
+	}
+	left = right = seedPos;
+
+	// Now start checking
+	int i;
+	for(i=seedPos-1; i>=0; i--)
+	{
+		if( IsTwoStatesCompatible( seq1[i] , seq2[i]) == false  )
+		{
+			break;
+		}
+	}
+	if( i >= 0)
+	{
+		left = i;
+	}
+	for(i=seedPos+1; i<seq1.size(); ++i)
+	{
+		if( IsTwoStatesCompatible(seq1[i], seq2[i] ) == false )
+		{
+			break;
+		}
+	}
+	if( i< seq1.size() )
+	{
+		right = i;
+	}
+	else
+	{
+		right = seq1.size() - 1;
+	}
 }
 
 // Compute the segments in the region of [left, right]
-int CompareSegments(const SEQUENCE &seq, const SEQUENCE &targetSeq, int left,
-                    int right) {
-  int res = 0;
-  // int numGap = 0;						// do we consider
-  // gap here?
-  for (int i = left; i <= right; ++i) {
-    if (IsTwoStatesCompatible(seq[i], targetSeq[i]) == true) {
-      res++;
-    }
-  }
-  return res;
-}
-
-int IsSeqsMutPair(const SEQUENCE &seq1, const SEQUENCE &seq2) {
-  // This function test if seq1/seq2 mutates at some site
-  // If not, return -1. Otherwise, return the site that they differ
-  // note that when there is missing data, we assume two compatible vals
-  // do not form muatant pair
-  int res = -1;
-  for (int i = 0; i < seq1.size(); ++i) {
-    if (IsTwoStatesCompatible(seq1[i], seq2[i]) == false) {
-      if (res < 0) {
-        res = i;
-      } else {
-        // We have seen one difference before, not mutation pair
-        return -1;
-      }
-    }
-  }
-  return res;
+int  CompareSegments(const SEQUENCE &  seq, const SEQUENCE &targetSeq, int left, int right   )
+{
+	int res = 0;
+	//int numGap = 0;						// do we consider gap here?
+	for(int i=left; i <=right; ++i)
+	{
+		if(  IsTwoStatesCompatible( seq[i],  targetSeq[i]) == true )
+		{
+			res ++;
+		}
+	}
+	return res;
 }
 
-int CalcSequencesDistance(const SEQUENCE &seq1, const SEQUENCE &seq2) {
-  // similarly, when there is missing data, assume they can be fit to the best
-  int res = 0;
-  for (int i = 0; i < seq1.size(); ++i) {
-    if (IsTwoStatesCompatible(seq1[i], seq2[i]) == false) {
-      res++;
-    }
-  }
-  return res;
+int IsSeqsMutPair( const SEQUENCE &seq1, const SEQUENCE &seq2)
+{
+	// This function test if seq1/seq2 mutates at some site
+	// If not, return -1. Otherwise, return the site that they differ
+    // note that when there is missing data, we assume two compatible vals
+    // do not form muatant pair
+	int res = -1;
+	for(int i=0; i<seq1.size(); ++i)
+	{
+		if( IsTwoStatesCompatible( seq1[i], seq2[i] ) == false)
+		{
+			if( res < 0)
+			{
+				res = i;
+			}
+			else
+			{
+				// We have seen one difference before, not mutation pair
+				return -1;
+			}
+		}
+	}
+	return res;
 }
 
-void GetNewSequences(const set<SEQUENCE> &setNewNodes,
-                     const set<SEQUENCE> &setExistingSeqs,
-                     vector<SEQUENCE> &seqNews) {
-  // Get the new nodes into a vector
-  for (set<SEQUENCE>::iterator it = setNewNodes.begin();
-       it != setNewNodes.end(); ++it) {
-    if (setExistingSeqs.find(*it) == setExistingSeqs.end()) {
-      seqNews.push_back(*it);
-    }
-  }
+int CalcSequencesDistance( const SEQUENCE &seq1, const SEQUENCE &seq2)
+{
+    // similarly, when there is missing data, assume they can be fit to the best
+	int res = 0;
+	for(int i=0; i<seq1.size(); ++i)
+	{
+		if( IsTwoStatesCompatible( seq1[i] , seq2[i] ) == false )
+		{
+			res++;
+		}
+	}
+	return res;
+}
+
+void GetNewSequences(const set<SEQUENCE>& setNewNodes, const set<SEQUENCE>& setExistingSeqs,
+					 vector<SEQUENCE>& seqNews)
+{
+	// Get the new nodes into a vector
+	for(set<SEQUENCE> :: iterator it = setNewNodes.begin(); it != setNewNodes.end(); ++it)
+	{
+		if( setExistingSeqs.find( *it) == setExistingSeqs.end() )
+		{
+			seqNews.push_back( *it );
+		}
+	}
 }
 //************************************************************************************************
 // Utilities for haplotyping
 //************************************************************************************************
 
-void GenHapRowsSetFromGenoRows(set<int> &hapRowsSet, int numGenoRows) {
-  hapRowsSet.clear();
-  for (int i = 0; i < 2 * numGenoRows; ++i) {
-    hapRowsSet.insert(i);
-  }
+void GenHapRowsSetFromGenoRows(set<int> &hapRowsSet, int numGenoRows )
+{
+    hapRowsSet.clear();
+    for(int i=0; i<2*numGenoRows; ++i)
+    {
+        hapRowsSet.insert( i );
+    }
 }
 
-bool IsTwoLabelSetsCompatible(const set<int> &partition,
-                              const vector<int> &genoSite, bool &fZeroOne) {
-  // Two 2-label sets are compatible when we treat 2i-1, 2i heterozygotes the
-  // same
-  int *tblEntryOccurs = new int[genoSite.size()];
-  for (int i = 0; i < genoSite.size(); ++i) {
-    tblEntryOccurs[i] = 0;
-  }
+bool IsTwoLabelSetsCompatible( const set<int> &partition,   const vector<int> &genoSite, bool &fZeroOne )
+{
+    // Two 2-label sets are compatible when we treat 2i-1, 2i heterozygotes the same
+    int *tblEntryOccurs = new int[genoSite.size()];
+    for(int i=0; i<genoSite.size(); ++i)
+    {
+        tblEntryOccurs[i] = 0;
+    }
 
-  for (set<int>::iterator it = partition.begin(); it != partition.end(); ++it) {
-    int r = *it;
-    tblEntryOccurs[r / 2]++;
-  }
+	for( set<int>::iterator it = partition.begin(); it!= partition.end(); ++it)
+    {
+        int r = *it;
+        tblEntryOccurs[r/2] ++;
+    }
 
-  // We treat tblNeeded as the 0-partition
-  // We let tblNeeded to store zero-element occurance
-  int *tblNeeded = new int[genoSite.size()];
-  for (int i = 0; i < genoSite.size(); ++i) {
-    if (genoSite[i] == 2) {
-      tblNeeded[i] = 1;
-    } else if (genoSite[i] == 1) {
-      tblNeeded[i] = 2;
+    // We treat tblNeeded as the 0-partition
+    // We let tblNeeded to store zero-element occurance
+    int *tblNeeded = new int[genoSite.size()];
+    for(int i=0; i<genoSite.size(); ++i)
+    {
+        if(genoSite[i] == 2)
+        {
+            tblNeeded[i] = 1;
+        }
+        else if( genoSite[i] == 1)
+        {
+            tblNeeded[i] = 2;
+        }
     }
-  }
 
-  // Now to see if it matches
-  int res = false;
+    // Now to see if it matches
+    int res = false;
 
-  bool earlyBreak = false;
-  for (int i = 0; i < genoSite.size(); ++i) {
-    if (tblEntryOccurs[i] != tblNeeded[i]) {
-      earlyBreak = true;
-      break;
+    bool earlyBreak = false;
+    for(int i=0; i<genoSite.size(); ++i)
+    {
+        if( tblEntryOccurs[i] != tblNeeded[i] )
+        {
+            earlyBreak = true;
+            break;
+        }
+//        if(  tblNeeded[i] ==1 &&  tblNeeded[i] != tblEntryOccurs[i]  )
+//        {
+//            earlyBreak = true;
+//            break;
+//        }
+//        if( tblNeeded[i] != 1 &&  tblNeeded[i] != tblEntryOccurs[i] )
+//        {
+//            earlyBreak = true;
+//            break;
+//        }
     }
-    //        if(  tblNeeded[i] ==1 &&  tblNeeded[i] != tblEntryOccurs[i]  )
-    //        {
-    //            earlyBreak = true;
-    //            break;
-    //        }
-    //        if( tblNeeded[i] != 1 &&  tblNeeded[i] != tblEntryOccurs[i] )
-    //        {
-    //            earlyBreak = true;
-    //            break;
-    //        }
-  }
-  if (earlyBreak == false) {
-    fZeroOne = true;
-    res = true;
-  }
-
-  // check another possibility
-  if (res == false) {
-    earlyBreak = false;
-    for (int i = 0; i < genoSite.size(); ++i) {
-      if (tblEntryOccurs[i] != 2 - tblNeeded[i]) {
-        earlyBreak = true;
-        break;
-      }
-      //            if(  tblNeeded[i] ==1 &&  tblNeeded[i] != tblEntryOccurs[i]
-      //            )
-      //            {
-      //                earlyBreak = true;
-      //                break;
-      //            }
-      //            if( tblNeeded[i] != 1 &&  tblNeeded[i] == tblEntryOccurs[i]
-      //            )
-      //            {
-      //                earlyBreak = true;
-      //                break;
-      //            }
+    if(earlyBreak == false)
+    {
+        fZeroOne = true;
+        res = true;
     }
-    if (earlyBreak == false) {
-      fZeroOne = false; // indicate this is a zero set
-      res = true;
+
+
+    // check another possibility
+    if( res == false)
+    {
+        earlyBreak = false;
+        for(int i=0; i<genoSite.size(); ++i)
+        {
+            if( tblEntryOccurs[i] != 2 - tblNeeded[i] )
+            {
+                earlyBreak = true;
+                break;
+            }
+//            if(  tblNeeded[i] ==1 &&  tblNeeded[i] != tblEntryOccurs[i]  )
+//            {
+//                earlyBreak = true;
+//                break;
+//            }
+//            if( tblNeeded[i] != 1 &&  tblNeeded[i] == tblEntryOccurs[i] )
+//            {
+//                earlyBreak = true;
+//                break;
+//            }
+        }
+        if(earlyBreak == false)
+        {
+            fZeroOne = false;       // indicate this is a zero set
+            res = true;
+        }
+
     }
-  }
 
 #if 0
     for(int i=0; i<genoSite.size(); ++i)
@@ -1113,95 +1345,114 @@ bool IsTwoLabelSetsCompatible(const set<int> &partition,
         }
     }
 #endif
-  delete[] tblEntryOccurs;
-  delete[] tblNeeded;
-  return res;
-}
-
-void GenGenoPartitions(const vector<int> &genoSite, vector<int> &part0,
-                       vector<int> &part1) {
-  part0.clear();
-  part1.clear();
-  for (int i = 0; i < genoSite.size(); ++i) {
-    if (genoSite[i] == 2) {
-      // We simply put the first into part0
-      part0.push_back(2 * i);
-      part1.push_back(2 * i + 1);
-    } else if (genoSite[i] == 0) {
-      part0.push_back(2 * i);
-      part0.push_back(2 * i + 1);
-    } else if (genoSite[i] == 1) {
-      part1.push_back(2 * i);
-      part1.push_back(2 * i + 1);
+    delete [] tblEntryOccurs;
+    delete [] tblNeeded;
+    return res;
+}
+
+void GenGenoPartitions(const vector<int> &genoSite, vector<int> &part0, vector<int> &part1  )
+{
+    part0.clear();
+    part1.clear();
+    for(int i=0; i<genoSite.size(); ++i)
+    {
+        if( genoSite[i] == 2)
+        {
+            // We simply put the first into part0
+            part0.push_back( 2*i );
+            part1.push_back( 2*i+1 );
+        }
+        else if( genoSite[i] == 0)
+        {
+            part0.push_back( 2*i );
+            part0.push_back( 2*i+1 );
+        }
+        else if( genoSite[i] == 1)
+        {
+            part1.push_back( 2*i );
+            part1.push_back( 2*i+1 );
+        }
     }
-  }
 }
 
-bool Is2TwoLabelMatch(int lbla, int lblb) {
-  // The two label matches if the belong to the same geno row
-  return (lbla / 2 == lblb / 2);
+bool Is2TwoLabelMatch(int lbla, int lblb)
+{
+    // The two label matches if the belong to the same geno row
+    return (lbla/2 == lblb/2);
 }
 
-bool IsTwoLabelSetContained(int genoLength, const vector<int> &setContainer,
-                            const vector<int> &setContained) {
-  if (setContained.size() > setContainer.size()) {
-    // cout << "IsTwoLabelSetContained: size mismatched.\n";
-    return false; // size mismatch
-  }
-  // cout << "setContainer: ";
-  // DumpIntVec( setContainer);
-  // cout << "setContained: ";
-  // DumpIntVec( setContained);
-
-  int *tblContainer = new int[genoLength];
-  int *tblContained = new int[genoLength];
-
-  // Init tbl
-  for (int i = 0; i < genoLength; ++i) {
-    tblContainer[i] = 0;
-    tblContained[i] = 0;
-  }
-  for (int i = 0; i < setContainer.size(); ++i) {
-    tblContainer[setContainer[i] / 2]++;
-  }
-  for (int i = 0; i < setContained.size(); ++i) {
-    tblContained[setContained[i] / 2]++;
-  }
-  // Check to see if one contain another
-  int res = true;
-  for (int i = 0; i < genoLength; ++i) {
-    if (tblContained[i] > tblContainer[i]) {
-      // cout << "Not contained!.\n";
-      res = false;
-      break;
+bool IsTwoLabelSetContained(int genoLength, const vector<int> &setContainer,   const vector<int> &setContained )
+{
+    if(setContained.size() > setContainer.size())
+    {
+//cout << "IsTwoLabelSetContained: size mismatched.\n";
+        return false;   // size mismatch
     }
-  }
+//cout << "setContainer: ";
+//DumpIntVec( setContainer);
+//cout << "setContained: ";
+//DumpIntVec( setContained);
+
+    int *tblContainer = new int[genoLength];
+    int *tblContained = new int[genoLength];
 
-  delete[] tblContainer;
-  delete[] tblContained;
-  return res;
+    // Init tbl
+    for(int i=0; i<genoLength; ++i)
+    {
+        tblContainer[i] = 0;
+        tblContained[i] = 0;
+    }
+    for(int i=0; i<setContainer.size(); ++i)
+    {
+        tblContainer[ setContainer[i] /2 ] ++;
+    }
+    for(int i=0; i<setContained.size(); ++i)
+    {
+        tblContained[ setContained[i] /2 ] ++;
+    }
+    // Check to see if one contain another
+    int res = true;
+    for(int i=0; i<genoLength; ++i)
+    {
+        if(  tblContained[i] > tblContainer[i]  )
+        {
+//cout << "Not contained!.\n";
+            res = false;
+            break;
+        }
+    }
+
+
+    delete [] tblContainer;
+    delete [] tblContained;
+    return res;
 }
 
-void CalcGenoNum(int genoLength, const vector<int> &partition,
-                 vector<int> &genoNums) {
-  // cout << "CalcGenoNum: partition = ";
-  // DumpIntVec ( partition );
-  genoNums.clear();
-  for (int i = 0; i < genoLength; ++i) {
-    genoNums.push_back(0);
-  }
-  for (int i = 0; i < partition.size(); ++i) {
-    genoNums[partition[i] / 2]++;
-  }
+void CalcGenoNum(int genoLength, const vector<int> &partition, vector<int> &genoNums)
+{
+//cout << "CalcGenoNum: partition = ";
+//DumpIntVec ( partition );
+    genoNums.clear();
+    for(int i=0; i<genoLength; ++i)
+    {
+        genoNums.push_back ( 0 );
+    }
+    for(int i=0; i<partition.size(); ++i)
+    {
+        genoNums[ partition[i]/2 ]++;
+    }
 }
 
-int Find2LabelOccNum(int lbl, const set<int> &setUniqeLables) {
-  int res = 0;
-  if (setUniqeLables.find(2 * lbl) != setUniqeLables.end()) {
-    ++res;
-  }
-  if (setUniqeLables.find(2 * lbl + 1) != setUniqeLables.end()) {
-    ++res;
-  }
-  return res;
+int Find2LabelOccNum(int lbl, const set<int> &setUniqeLables)
+{
+    int res = 0;
+    if( setUniqeLables.find( 2*lbl) != setUniqeLables.end() )
+    {
+        ++res;
+    }
+    if( setUniqeLables.find( 2*lbl+1) != setUniqeLables.end() )
+    {
+        ++res;
+    }
+    return res;
 }
diff --git a/trisicell/external/scistree/Utils.h b/trisicell/external/scistree/Utils.h
index d07f8fc..4fcea61 100644
--- a/trisicell/external/scistree/Utils.h
+++ b/trisicell/external/scistree/Utils.h
@@ -1,19 +1,20 @@
 #ifndef UTILS_H
 #define UTILS_H
 
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <fstream>
-#include <iostream>
 #include <list>
-#include <map>
+#include <vector>
 #include <set>
+#include <map>
 #include <string>
-#include <vector>
+#include <iostream>
+#include <fstream>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
 //#include <limits>
 using namespace std;
 
+
 // ***************************************************************************
 // Common utilities
 // ***************************************************************************
@@ -21,61 +22,59 @@ using namespace std;
 #define DEBUG(x)
 
 // Important structure
-typedef pair<int, int> INTERVAL;
-// typedef numeric_limits<int> HAP_MAX_INT;
-#define HAP_MAX_INT 0xFFFFFFF
-#define MISSING_VALUE_BIT 9 // pretty arbitary setting
+typedef pair<int,int> INTERVAL;
+//typedef numeric_limits<int> HAP_MAX_INT;
+#define HAP_MAX_INT             0xFFFFFFF
+#define MISSING_VALUE_BIT    9          // pretty arbitary setting
 
-void JoinSets(const set<int> &s1, const set<int> &s2, set<int> &res);
+void JoinSets( const set<int> &s1, const set<int> &s2, set<int> &res);
 void SubtractSets(set<int> &s1, const set<int> &s2);
 void UnionSets(set<int> &sTotal, const set<int> &sToBeAdd);
 
 // template version of these popular methods
-// template <class T> void JoinSets( const set<T> &s1, const set<T> &s2, set<T>
-// &res); template <class T> void SubtractSets(set<T> &s1, const set<T> &s2);
-// template <class T> void UnionSets(set<T> &sTotal, const set<T> &sToBeAdd);
-// template <class T> void DumpSet( const set<T> &s);
-void JoinSets(const set<char> &s1, const set<char> &s2, set<char> &res);
+//template <class T> void JoinSets( const set<T> &s1, const set<T> &s2, set<T> &res);
+//template <class T> void SubtractSets(set<T> &s1, const set<T> &s2);
+//template <class T> void UnionSets(set<T> &sTotal, const set<T> &sToBeAdd);
+//template <class T> void DumpSet( const set<T> &s);
+void JoinSets( const set<char> &s1, const set<char> &s2, set<char> &res);
 void SubtractSets(set<char> &s1, const set<char> &s2);
 void UnionSets(set<char> &sTotal, const set<char> &sToBeAdd);
-void DumpSet(const set<char> &s);
-void ConvIntSetToCharSet(const set<int> &si, set<char> &sc);
-void ConvCharSetToIntSet(const set<char> &sc, set<int> &si);
+void DumpSet( const set<char> &s);
+void ConvIntSetToCharSet( const set<int> &si, set<char> &sc );
+void ConvCharSetToIntSet( const set<char> &sc, set<int> &si );
 
-void RmIntValFromSet(set<int> &s, int v);
+void RmIntValFromSet( set<int> &s, int v);
 void DumpIntSet(const set<int> &incSet);
 void DumpIntSetNoReturn(const set<int> &incSet);
 void DumpIntVec(const vector<int> &intVec);
-void PopulateSetByVec(set<int> &dest, const vector<int> &srcVec);
-void PopulateVecBySet(vector<int> &dest, const set<int> &srcSet);
-void CopyIntSet(set<int> &dest, const set<int> &src);
-void CopyIntVec(vector<int> &dest, const vector<int> &src);
-void CopySetIntVec(set<vector<int> > &dest, const set<vector<int> > &src);
-bool IsVecSame(const vector<int> &v1, const vector<int> &v2);
-bool IsIntVecInSet(const set<vector<int> > &s, const vector<int> &v);
-void ConvIntToVec(unsigned int val, vector<int> &vec, int numBits);
-unsigned int ConvVecToInt(const vector<int> &vec);
-
-void ConvIntToVecMSB(unsigned int val, vector<int> &vec, int numBits);
-unsigned int ConvVecToIntMSB(const vector<int> &vec);
-void ReverseIntVec(vector<int> &vec);
-
-unsigned int CalcBitInt(int pos, int width);
-bool GetNextEnumVec(vector<int> &curPos, const vector<int> &limitvec);
+void PopulateSetByVec( set<int> &dest, const vector<int> &srcVec);
+void PopulateVecBySet( vector<int> &dest, const set<int> &srcSet);
+void CopyIntSet(set<int> & dest, const set<int> &src);
+void CopyIntVec(vector<int> & dest, const vector<int> &src);
+void CopySetIntVec( set< vector<int> > &dest, const set< vector<int> > &src);
+bool IsVecSame( const vector<int> &v1, const vector<int> &v2);
+bool IsIntVecInSet ( const set< vector<int> > &s, const vector<int> &v);
+void ConvIntToVec( unsigned int val, vector<int> &vec, int numBits);
+unsigned int ConvVecToInt( const vector<int> &vec);
+
+void ConvIntToVecMSB( unsigned int val, vector<int> &vec, int numBits);
+unsigned int ConvVecToIntMSB( const vector<int> &vec);
+void ReverseIntVec( vector<int> &vec);
+
+unsigned int CalcBitInt( int pos, int width );
+bool GetNextEnumVec( vector<int>& curPos, const vector<int> &limitvec);
 void YW_ASSERT(bool f);
-void YW_ASSERT_INFO(bool f, const char *);
-void RemoveFromIntSet(vector<int> &targetSet, int val);
-bool IsIntSetEquiv(const set<vector<int> > &s1, const set<vector<int> > &s2);
-void OrderInt(int &i1, int &i2);
-void SortIntVec(vector<int> &vec, int start = 0, int end = -1);
-double GetRandFraction();
-int CalcCompositeBound(map<INTERVAL, int> &mapIntervalBds, int left, int right,
-                       vector<int> &locBreakpoints);
-void OutputBounds(char *boundsFileName, map<INTERVAL, int> &mapIntervalBds,
-                  int nSites);
-int ConvertToLinear(int r1, int r2, int nRows);
-int ConvertToLinearEq(int r1, int r2, int nRows);
-// void ConvertLinearToTwoIndices( int idLinear, int nRows, int &r1, int &r2 );
+void YW_ASSERT_INFO( bool f, const char *);
+void RemoveFromIntSet( vector<int> &targetSet, int val );
+bool IsIntSetEquiv(const set< vector<int> > &s1, const set< vector<int> >&s2);
+void OrderInt( int&i1, int &i2);
+void SortIntVec( vector<int> &vec, int start = 0, int end = - 1);
+double GetRandFraction( );
+int CalcCompositeBound( map<INTERVAL, int>& mapIntervalBds, int left, int right, vector<int> &locBreakpoints );
+void  OutputBounds(char *boundsFileName, map<INTERVAL, int>& mapIntervalBds, int nSites);
+int ConvertToLinear(int r1, int r2, int nRows) ;
+int ConvertToLinearEq(int r1, int r2, int nRows) ;
+//void ConvertLinearToTwoIndices( int idLinear, int nRows, int &r1, int &r2 );
 
 #if 0
 typedef struct
@@ -86,12 +85,12 @@ typedef struct
 #endif
 
 // Some utilities when doing permutations/combinations
-void GetFirstCombo(int k, int n, vector<int> &posvec);
-bool GetNextCombo(int k, int n, vector<int> &posvec);
+void GetFirstCombo(int k, int n, vector<int>& posvec);
+bool GetNextCombo(int k, int n, vector<int>& posvec);
 bool GetNextComboFrom(int k, int n, vector<int> &posvec, int startpos);
-void GetBoolVec(int num, const vector<int> &posvec, vector<bool> &bvec);
-void GetIntVec(int num, const vector<int> &posvec, vector<int> &bvec);
-void InitPermutation(vector<int> &nvec, const vector<int> &reference);
+void GetBoolVec(int num, const vector<int> &posvec, vector<bool>& bvec);
+void GetIntVec(int num, const vector<int> &posvec, vector<int>& bvec);
+void InitPermutation( vector<int> &nvec, const vector<int> &reference);
 bool GetNextPermutation(vector<int> &nvec, const vector<int> &reference);
 
 //************************************************************************************************
@@ -99,47 +98,37 @@ bool GetNextPermutation(vector<int> &nvec, const vector<int> &reference);
 //************************************************************************************************
 typedef vector<int> SEQUENCE;
 
-bool IsMissingValueBit(int bit);
-int GetMissingValueBit();
-bool IsSeqHasMV(const SEQUENCE &seq);
-void FillVecWithMV(SEQUENCE &seq, int len);
+bool IsMissingValueBit( int bit );
+int GetMissingValueBit( );
+bool IsSeqHasMV( const SEQUENCE &seq );
+void FillVecWithMV( SEQUENCE &seq, int len);
 bool IsTwoStatesCompatible(int bit1, int bit2);
-bool AreTwoSeqsCompatible(const SEQUENCE &seq1, const SEQUENCE &seq2);
-void GetCompatibleSeqForTwo(const SEQUENCE &seq1, const SEQUENCE &seq2,
-                            SEQUENCE &consensus);
+bool AreTwoSeqsCompatible( const SEQUENCE &seq1, const SEQUENCE &seq2);
+void GetCompatibleSeqForTwo( const SEQUENCE &seq1, const SEQUENCE &seq2, SEQUENCE &consensus);
 void MutateSeqAtSite(SEQUENCE &seq, int site);
-void RecombSequencesAt(const SEQUENCE &s1, const SEQUENCE &s2, int brPt,
-                       SEQUENCE &sr);
-bool IsSeqRecombinnable(const SEQUENCE &s1, const SEQUENCE &s2,
-                        const SEQUENCE &st);
-bool IsSeqRecombinnableIV(const SEQUENCE &s1, const SEQUENCE &s2,
-                          const SEQUENCE &st, INTERVAL &iv);
-void AddUniqueSeqToVec(const SEQUENCE &seq, vector<SEQUENCE> &vecSeqs);
-bool IsSeqInVec(const SEQUENCE &seq, const vector<SEQUENCE> &vecSeqs);
-bool IsSeqInSet(const SEQUENCE &seq, const set<SEQUENCE> &vecSeqs);
-void GetEqualSubseq(const SEQUENCE &seq1, const SEQUENCE &seq2, int seedPos,
-                    int &left, int &right);
-int CompareSegments(const SEQUENCE &seq, const SEQUENCE &targetSeq, int left,
-                    int right);
-int IsSeqsMutPair(const SEQUENCE &seq1, const SEQUENCE &seq2);
-int CalcSequencesDistance(const SEQUENCE &seq1, const SEQUENCE &seq2);
-void GetNewSequences(const set<SEQUENCE> &setNewNodes,
-                     const set<SEQUENCE> &setExistingSeqs,
-                     vector<SEQUENCE> &seqNews);
+void RecombSequencesAt(const SEQUENCE &s1, const SEQUENCE &s2, int brPt, SEQUENCE &sr);
+bool IsSeqRecombinnable(const SEQUENCE & s1, const SEQUENCE & s2, const SEQUENCE & st) ;
+bool IsSeqRecombinnableIV(const SEQUENCE & s1, const SEQUENCE & s2, const SEQUENCE & st, INTERVAL &iv) ;
+void AddUniqueSeqToVec( const SEQUENCE &seq, vector<SEQUENCE> &vecSeqs);
+bool IsSeqInVec( const SEQUENCE &seq, const vector<SEQUENCE> &vecSeqs);
+bool IsSeqInSet( const SEQUENCE &seq, const set<SEQUENCE> &vecSeqs);
+void GetEqualSubseq(const SEQUENCE &seq1, const SEQUENCE &seq2, int seedPos, int &left, int &right);
+int  CompareSegments(const SEQUENCE &  seq, const SEQUENCE &targetSeq, int left, int right );
+int IsSeqsMutPair( const SEQUENCE &seq1, const SEQUENCE &seq2);
+int CalcSequencesDistance( const SEQUENCE &seq1, const SEQUENCE &seq2);
+void GetNewSequences(const set<SEQUENCE>& setNewNodes, const set<SEQUENCE>& setExistingSeqs,
+					 vector<SEQUENCE>& seqNews);
 
 //************************************************************************************************
 // Utilities for haplotyping
 //************************************************************************************************
-void GenHapRowsSetFromGenoRows(set<int> &hapRowsSet, int numGenoRows);
-bool IsTwoLabelSetsCompatible(const set<int> &partition,
-                              const vector<int> &genoSite, bool &fZeroOne);
-void GenGenoPartitions(const vector<int> &genoSite, vector<int> &part1,
-                       vector<int> &part2);
+void GenHapRowsSetFromGenoRows(set<int> &hapRowsSet, int numGenoRows );
+bool IsTwoLabelSetsCompatible( const set<int> &partition,   const vector<int> &genoSite, bool &fZeroOne );
+void GenGenoPartitions(const vector<int> &genoSite, vector<int> &part1, vector<int> &part2  );
 bool Is2TwoLabelMatch(int lbla, int lblb);
-bool IsTwoLabelSetContained(int genoLength, const vector<int> &setContainer,
-                            const vector<int> &setContained);
-void CalcGenoNum(int genoLength, const vector<int> &partition,
-                 vector<int> &genoNums);
+bool IsTwoLabelSetContained( int genoLength, const vector<int> &setContainer,   const vector<int> &setContained );
+void CalcGenoNum(int genoLength, const vector<int> &partition, vector<int> &genoNums);
 int Find2LabelOccNum(int lbl, const set<int> &setUniqeLables);
 
-#endif // UTILS_H
+
+#endif //UTILS_H
diff --git a/trisicell/external/scistree/Utils2.cpp b/trisicell/external/scistree/Utils2.cpp
index 9871c4a..3a2b5b4 100644
--- a/trisicell/external/scistree/Utils2.cpp
+++ b/trisicell/external/scistree/Utils2.cpp
@@ -1,636 +1,768 @@
-#include "Utils2.h"
 #include "Utils.h"
-#include "cstdio"
-#include "cstdlib"
+#include "Utils2.h"
 #include "ctime"
+#include "cstdlib"
+#include "cstdio"
 
-//////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////
 // Utility functions
-//////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////
 
-long GetCurrentTimeTick() { return (long)time(NULL); }
+long GetCurrentTimeTick()
+{
+    return (long)time(NULL);
+}
 
-long GetElapseTime(long lastTime) {
-  return (long)((long)time(NULL) - lastTime);
+long GetElapseTime(long lastTime)
+{
+    return (long)((long)time(NULL) - lastTime);
 }
 
-void GetCurrentCPUTime(std::clock_t &tmStart) {
-  //
-  tmStart = std::clock();
+void GetCurrentCPUTime(std::clock_t &tmStart)
+{
+    //
+    tmStart = std::clock();
 }
 
-double GetElapseCPUTime(const std::clock_t &tmStart) {
-  //
-  return (std::clock() - tmStart) / (double)CLOCKS_PER_SEC;
+double GetElapseCPUTime(const std::clock_t &tmStart)
+{
+    //
+    return (std::clock() - tmStart) / (double)CLOCKS_PER_SEC;
 }
 
-bool IsBoolArrayAllTrue(bool *bArray, int size) {
-  for (int i = 0; i < size; ++i) {
-    if (bArray[i] == false) {
-      return false;
+bool IsBoolArrayAllTrue(bool *bArray, int size)
+{
+    for (int i = 0; i < size; ++i)
+    {
+        if (bArray[i] == false)
+        {
+            return false;
+        }
     }
-  }
-  return true;
+    return true;
 }
 
-void AppendIntVec(vector<int> &dest, const vector<int> &appending) {
-  for (unsigned int i = 0; i < appending.size(); ++i) {
-    dest.push_back(appending[i]);
-  }
+void AppendIntVec(vector<int> &dest, const vector<int> &appending)
+{
+    for (unsigned int i = 0; i < appending.size(); ++i)
+    {
+        dest.push_back(appending[i]);
+    }
 }
 
-bool IsSetContainer(const set<int> &container, const set<int> &contained) {
-  for (set<int>::iterator it = contained.begin(); it != contained.end(); ++it) {
-    if (container.find(*it) == container.end()) {
-      return false;
+bool IsSetContainer(const set<int> &container, const set<int> &contained)
+{
+    for (set<int>::iterator it = contained.begin(); it != contained.end(); ++it)
+    {
+        if (container.find(*it) == container.end())
+        {
+            return false;
+        }
     }
-  }
-  return true;
+    return true;
 }
 
-bool IsSetContainedInSets(const set<int> &s, const set<set<int> > &sets) {
-  // This function return true if ONE of the sets in sets contains s, false
-  // otherwise
-  for (set<set<int> >::iterator it = sets.begin(); it != sets.end(); ++it) {
-    if (IsSetContainer(*it, s) == true) {
-      return true;
+bool IsSetContainedInSets(const set<int> &s, const set<set<int>> &sets)
+{
+    // This function return true if ONE of the sets in sets contains s, false otherwise
+    for (set<set<int>>::iterator it = sets.begin(); it != sets.end(); ++it)
+    {
+        if (IsSetContainer(*it, s) == true)
+        {
+            return true;
+        }
     }
-  }
-  return false;
+    return false;
 }
 
-bool IsSetContainingOneOfSets(const set<int> &s, const set<set<int> > &sets) {
-  // This function return true if ONE of the sets in sets contains s, false
-  // otherwise
-  for (set<set<int> >::iterator it = sets.begin(); it != sets.end(); ++it) {
-    if (IsSetContainer(s, *it) == true) {
-      return true;
+bool IsSetContainingOneOfSets(const set<int> &s, const set<set<int>> &sets)
+{
+    // This function return true if ONE of the sets in sets contains s, false otherwise
+    for (set<set<int>>::iterator it = sets.begin(); it != sets.end(); ++it)
+    {
+        if (IsSetContainer(s, *it) == true)
+        {
+            return true;
+        }
     }
-  }
-  return false;
+    return false;
 }
 
-void ConcatIntVec(vector<int> &vecAdded, const vector<int> &vecToAdd) {
-  // Append one vector to another
-  for (unsigned int i = 0; i < vecToAdd.size(); i++) {
-    vecAdded.push_back(vecToAdd[i]);
-  }
+void ConcatIntVec(vector<int> &vecAdded, const vector<int> &vecToAdd)
+{
+    // Append one vector to another
+    for (unsigned int i = 0; i < vecToAdd.size(); i++)
+    {
+        vecAdded.push_back(vecToAdd[i]);
+    }
+}
+
+int ConvIntSetToPosition(const set<int> &s)
+{
+    //cout << "In ConvIntSetToPosition: s = ";
+    //DumpIntSet( s );
+    // this function convert an integer set to a position index, for example,
+    // if range = 8, s={2, 4}, then this converts to 00010100
+    int res = 0;
+    for (set<int>::iterator it = s.begin(); it != s.end(); ++it)
+    {
+        int a = *it;
+        int mask = 0x1 << a;
+        res = res | mask;
+    }
+    //cout << "conversion res = " << res << endl;
+    return res;
+}
+
+void ConvPositionToIntSet(int val, set<int> &s)
+{
+    // inverse to ConvIntSetToPosition: convert an integer back to a set
+    s.clear();
+    int pos = 0;
+    while (val != 0)
+    {
+        if ((val & 0x1) != 0)
+        {
+            s.insert(pos);
+        }
+        pos++;
+        // left-shift val
+        val = (val >> 1);
+    }
 }
 
-int ConvIntSetToPosition(const set<int> &s) {
-  // cout << "In ConvIntSetToPosition: s = ";
-  // DumpIntSet( s );
-  // this function convert an integer set to a position index, for example,
-  // if range = 8, s={2, 4}, then this converts to 00010100
-  int res = 0;
-  for (set<int>::iterator it = s.begin(); it != s.end(); ++it) {
-    int a = *it;
-    int mask = 0x1 << a;
-    res = res | mask;
-  }
-  // cout << "conversion res = " << res << endl;
-  return res;
+void PopulateSetWithInterval(set<int> &s, int left, int right)
+{
+    s.clear();
+    for (int i = left; i <= right; ++i)
+    {
+        s.insert(i);
+    }
 }
 
-void ConvPositionToIntSet(int val, set<int> &s) {
-  // inverse to ConvIntSetToPosition: convert an integer back to a set
-  s.clear();
-  int pos = 0;
-  while (val != 0) {
-    if ((val & 0x1) != 0) {
-      s.insert(pos);
+void GetSeqInterval(const SEQUENCE &row, SEQUENCE &rowIV, int left, int right)
+{
+    rowIV.clear();
+    for (int i = left; i <= right; ++i)
+    {
+        rowIV.push_back(row[i]);
     }
-    pos++;
-    // left-shift val
-    val = (val >> 1);
-  }
 }
 
-void PopulateSetWithInterval(set<int> &s, int left, int right) {
-  s.clear();
-  for (int i = left; i <= right; ++i) {
-    s.insert(i);
-  }
+bool IsIntervalContained(const set<SEQUENCE> &seqs, int left, int right, const SEQUENCE &seqIV)
+{
+    // This function check to see if the seqIV is contained in the seqs
+    // when there is missing site, we use COMPABILITY instead of ==
+    for (set<SEQUENCE>::iterator it = seqs.begin(); it != seqs.end(); ++it)
+    {
+        SEQUENCE substr;
+        GetSeqInterval(*it, substr, left, right);
+        if (AreTwoSeqsCompatible(substr, seqIV) == true)
+        {
+            return true;
+        }
+    }
+    return false;
 }
 
-void GetSeqInterval(const SEQUENCE &row, SEQUENCE &rowIV, int left, int right) {
-  rowIV.clear();
-  for (int i = left; i <= right; ++i) {
-    rowIV.push_back(row[i]);
-  }
+void SubtractSequenceSets(set<SEQUENCE> &s1, const set<SEQUENCE> &s2)
+{
+    if (s2.size() == 0)
+    {
+        return;
+    }
+    set<SEQUENCE> res;
+    // this function performs set intersection, i.e. s1=s1 ^s2
+    for (set<SEQUENCE>::iterator it = s1.begin(); it != s1.end(); ++it)
+    {
+        if (s2.find(*it) == s2.end())
+        {
+            res.insert(*it);
+        }
+    }
+    s1.clear();
+    s1 = res;
 }
 
-bool IsIntervalContained(const set<SEQUENCE> &seqs, int left, int right,
-                         const SEQUENCE &seqIV) {
-  // This function check to see if the seqIV is contained in the seqs
-  // when there is missing site, we use COMPABILITY instead of ==
-  for (set<SEQUENCE>::iterator it = seqs.begin(); it != seqs.end(); ++it) {
-    SEQUENCE substr;
-    GetSeqInterval(*it, substr, left, right);
-    if (AreTwoSeqsCompatible(substr, seqIV) == true) {
-      return true;
+void DumpSequence(const SEQUENCE &seq)
+{
+    for (unsigned int i = 0; i < seq.size(); ++i)
+    {
+        if (IsMissingValueBit(seq[i]) == false)
+        {
+            cout << seq[i];
+        }
+        else
+        {
+            cout << "*";
+        }
     }
-  }
-  return false;
+    cout << endl;
 }
 
-void SubtractSequenceSets(set<SEQUENCE> &s1, const set<SEQUENCE> &s2) {
-  if (s2.size() == 0) {
-    return;
-  }
-  set<SEQUENCE> res;
-  // this function performs set intersection, i.e. s1=s1 ^s2
-  for (set<SEQUENCE>::iterator it = s1.begin(); it != s1.end(); ++it) {
-    if (s2.find(*it) == s2.end()) {
-      res.insert(*it);
+void DumpVecSequences(const vector<SEQUENCE> &vecSeqs)
+{
+    for (unsigned int i = 0; i < vecSeqs.size(); ++i)
+    {
+        DumpSequence(vecSeqs[i]);
     }
-  }
-  s1.clear();
-  s1 = res;
 }
-
-void DumpSequence(const SEQUENCE &seq) {
-  for (unsigned int i = 0; i < seq.size(); ++i) {
-    if (IsMissingValueBit(seq[i]) == false) {
-      cout << seq[i];
-    } else {
-      cout << "*";
+
+void DumpSetSequences(const set<SEQUENCE> &setSeqs)
+{
+    for (set<SEQUENCE>::iterator it = setSeqs.begin(); it != setSeqs.end(); ++it)
+    {
+        DumpSequence(*it);
     }
-  }
-  cout << endl;
 }
-
-void DumpVecSequences(const vector<SEQUENCE> &vecSeqs) {
-  for (unsigned int i = 0; i < vecSeqs.size(); ++i) {
-    DumpSequence(vecSeqs[i]);
-  }
+
+bool AreTwoInSameSet(int i1, int i2, const set<set<int>> &collections)
+{
+    // Check to see if i1 and i2 is in same set
+    for (set<set<int>>::iterator it = collections.begin(); it != collections.end(); ++it)
+    {
+        bool found1 = false, found2 = false;
+        if (it->find(i1) != it->end())
+        {
+            found1 = true;
+        }
+        if (it->find(i2) != it->end())
+        {
+            found2 = true;
+        }
+        if (found1 == true && found2 == true)
+        {
+            //cout << "i1 = " << i1 << ", i2 = " << i2 << " INDDED in same set.\n";
+            return true;
+        }
+        if (found1 || found2)
+        {
+            //cout << "i1 = " << i1 << ", i2 = " << i2 << " not in same set.\n";
+            return false;
+        }
+    }
+    // should not need this, in case
+    YW_ASSERT_INFO(false, "Bad i1 or i2.");
+    return false;
 }
 
-void DumpSetSequences(const set<SEQUENCE> &setSeqs) {
-  for (set<SEQUENCE>::iterator it = setSeqs.begin(); it != setSeqs.end();
-       ++it) {
-    DumpSequence(*it);
-  }
-}
-
-bool AreTwoInSameSet(int i1, int i2, const set<set<int> > &collections) {
-  // Check to see if i1 and i2 is in same set
-  for (set<set<int> >::iterator it = collections.begin();
-       it != collections.end(); ++it) {
-    bool found1 = false, found2 = false;
-    if (it->find(i1) != it->end()) {
-      found1 = true;
-    }
-    if (it->find(i2) != it->end()) {
-      found2 = true;
+int GetItemIndexInVec(const vector<int> &vec, int item)
+{
+    for (unsigned int i = 0; i < vec.size(); ++i)
+    {
+        if (vec[i] == item)
+        {
+            return i;
+        }
     }
-    if (found1 == true && found2 == true) {
-      // cout << "i1 = " << i1 << ", i2 = " << i2 << " INDDED in same set.\n";
-      return true;
+    return -1;
+}
+
+bool IsIntervalOverlap(const INTERVAL &iv1, const INTERVAL &iv2)
+{
+    if (iv1.second < iv2.first || iv2.second < iv1.first)
+    {
+        return false;
     }
-    if (found1 || found2) {
-      // cout << "i1 = " << i1 << ", i2 = " << i2 << " not in same set.\n";
-      return false;
+    else
+    {
+        return true;
     }
-  }
-  // should not need this, in case
-  YW_ASSERT_INFO(false, "Bad i1 or i2.");
-  return false;
 }
 
-int GetItemIndexInVec(const vector<int> &vec, int item) {
-  for (unsigned int i = 0; i < vec.size(); ++i) {
-    if (vec[i] == item) {
-      return i;
+bool GetIntervalOverlap(const INTERVAL &iv1, const INTERVAL &iv2, INTERVAL &ivBoth)
+{
+    int left = iv1.first;
+    if (left < iv2.first)
+    {
+        left = iv2.first;
+    }
+    int right = iv1.second;
+    if (right > iv2.second)
+    {
+        right = iv2.second;
+    }
+    if (left > right)
+    {
+        return false;
+    }
+    else
+    {
+        ivBoth.first = left;
+        ivBoth.second = right;
+        return true;
     }
-  }
-  return -1;
 }
 
-bool IsIntervalOverlap(const INTERVAL &iv1, const INTERVAL &iv2) {
-  if (iv1.second < iv2.first || iv2.second < iv1.first) {
-    return false;
-  } else {
+void GenerateRandBinVector(int sz, vector<int> &randVec)
+{
+    //cout << "GenerateRandBinVector: sz = " << sz << endl;
+    // Generate random vector
+    randVec.clear();
+    for (int i = 0; i < sz; ++i)
+    {
+        //cout << " i = " << i << endl;
+        double r = GetRandFraction();
+        if (r >= 0.5)
+        {
+            randVec.push_back(0);
+        }
+        else
+        {
+            randVec.push_back(1);
+        }
+    }
+}
+bool IsBinary(int val)
+{
+    if (val == 0 || val == 1)
+    {
+        return true;
+    }
+    else
+    {
+        return false;
+    }
+}
+void ReOrderWithRemovedSites(const vector<int> &posAfterRem,
+                             const vector<int> &removedPos, vector<int> &posBeforeRemove)
+{
+    // THis funciton is often used here
+    // For example, we often removed sites from the matrix but then we need to know their original positions
+    // this function consider that by adding the removed sites back into order (not directly into posBeforeRem)
+    // but rather consider them when adding
+    posBeforeRemove.clear();
+
+    unsigned int pos = 0;
+    for (unsigned int i = 0; i < posAfterRem.size(); ++i)
+    {
+        while (pos < removedPos.size() && posAfterRem[i] + (int)pos >= removedPos[pos])
+        {
+            pos++;
+        }
+        posBeforeRemove.push_back(posAfterRem[i] + pos);
+    }
+}
+
+void GetSubsetVec(const vector<int> &vecOriginal, const set<int> &sitesToKeep, vector<int> &vecNew)
+{
+    vecNew.clear();
+    for (unsigned int i = 0; i < vecOriginal.size(); ++i)
+    {
+        if (sitesToKeep.find(i) != sitesToKeep.end())
+        {
+            vecNew.push_back(vecOriginal[i]);
+        }
+    }
+}
+
+void AddMissingVecBits(vector<int> &rowOrig, const set<int> &sitesToAdd, vector<int> &partialRow)
+{
+    YW_ASSERT_INFO(sitesToAdd.size() == partialRow.size(), "Parameter size mismatch");
+
+    // If there is othing to work, stop
+    if (sitesToAdd.size() == 0)
+    {
+        return;
+    }
+
+    cout << "AddMissingVecBits: rowOrig = ";
+    DumpSequence(rowOrig);
+    cout << "Append sites ";
+    DumpIntSet(sitesToAdd);
+    cout << "Missing values = ";
+    DumpIntVec(partialRow);
+    // Here we try to add back some missing sites
+    vector<int> missingSites;
+    PopulateVecBySet(missingSites, sitesToAdd);
+
+    vector<int> res;
+    int posMiss = 0;
+    int posOrig = 0;
+    int curpos = 0;
+
+    while (posMiss < (int)partialRow.size() || posOrig < (int)rowOrig.size())
+    {
+        // check to see which bit to use and move
+        if (curpos != missingSites[posMiss])
+        {
+            // This bit is original
+            YW_ASSERT_INFO(posOrig < (int)rowOrig.size(), "Serious error: not enough bits.");
+            res.push_back(rowOrig[posOrig]);
+            posOrig++;
+        }
+        else
+        {
+            // No this is a missing bit
+            res.push_back(partialRow[posMiss]);
+            posMiss++;
+            ;
+        }
+
+        // now move on
+        curpos++;
+    }
+    rowOrig = res;
+    cout << "AddMissingVecBits: res = ";
+    DumpSequence(rowOrig);
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+bool IsSequenceHaplotype(const SEQUENCE &seq)
+{
+    // note need to consider missing value!
+    for (unsigned int i = 0; i < seq.size(); ++i)
+    {
+        if (seq[i] != 0 && seq[i] != 1 && IsMissingValueBit(seq[i]) == false)
+        {
+            return false;
+        }
+    }
     return true;
-  }
-}
-
-bool GetIntervalOverlap(const INTERVAL &iv1, const INTERVAL &iv2,
-                        INTERVAL &ivBoth) {
-  int left = iv1.first;
-  if (left < iv2.first) {
-    left = iv2.first;
-  }
-  int right = iv1.second;
-  if (right > iv2.second) {
-    right = iv2.second;
-  }
-  if (left > right) {
-    return false;
-  } else {
-    ivBoth.first = left;
-    ivBoth.second = right;
+}
+
+bool IsSequenceGenotype(const SEQUENCE &seq)
+{
+    for (unsigned int i = 0; i < seq.size(); ++i)
+    {
+        if (seq[i] != 0 && seq[i] != 1 && seq[i] != 2 && IsMissingValueBit(seq[i]) == false)
+        {
+            return false;
+        }
+    }
     return true;
-  }
 }
 
-void GenerateRandBinVector(int sz, vector<int> &randVec) {
-  // cout << "GenerateRandBinVector: sz = " << sz << endl;
-  // Generate random vector
-  randVec.clear();
-  for (int i = 0; i < sz; ++i) {
-    // cout << " i = " << i << endl;
-    double r = GetRandFraction();
-    if (r >= 0.5) {
-      randVec.push_back(0);
-    } else {
-      randVec.push_back(1);
+bool CanPhaseGenoRow(const SEQUENCE &hap1, const SEQUENCE &hap2, const SEQUENCE &geno)
+{
+    YW_ASSERT_INFO(IsSequenceHaplotype(hap1), "hap1 is not haplotype row.");
+    YW_ASSERT_INFO(IsSequenceHaplotype(hap2), "hap1 is not haplotype row.");
+    YW_ASSERT_INFO(IsSequenceGenotype(geno), "hap1 is not haplotype row.");
+    YW_ASSERT_INFO(hap1.size() == hap2.size(), "Tow hap rows are not equal length");
+    YW_ASSERT_INFO(geno.size() == geno.size(), "Geno row is not the same size as hap row.");
+    // for now, do not allow hap1/hap2 contian missing value
+    YW_ASSERT_INFO(IsSeqHasMV(hap1) == false && IsSeqHasMV(hap2) == false, "Hap1/Hap2 can not contain missing values");
+    //cout << "hap1 = ";
+    //DumpIntVec(hap1 );
+    //cout << "hap2 = ";
+    //DumpIntVec(hap2 );
+    //cout << "geno = ";
+    //DumpIntVec( geno );
+    for (unsigned int i = 0; i < hap1.size(); i++)
+    {
+        // a missing vlaue can be phased either way
+        if (IsMissingValueBit(geno[i]) == true)
+        {
+            continue;
+        }
+
+        if (geno[i] == 2)
+        {
+            if (hap1[i] + hap2[i] != 1)
+            {
+                return false;
+            }
+        }
+        else
+        {
+            if (hap1[i] + hap2[i] != 2 * geno[i])
+            {
+                return false;
+            }
+        }
     }
-  }
+    return true;
 }
-bool IsBinary(int val) {
-  if (val == 0 || val == 1) {
+
+bool AreHapGenoRowCompatible(const SEQUENCE &hapRow, const SEQUENCE &genoRow, SEQUENCE *pComplement)
+{
+    if (pComplement != NULL)
+    {
+        pComplement->clear();
+    }
+
+    // Check if the haplotype row can be a phasing of the geno row
+    YW_ASSERT_INFO(IsSequenceHaplotype(hapRow), "hap is not haplotype row.");
+    YW_ASSERT_INFO(IsSequenceGenotype(genoRow), "genorow is not haplotype row.");
+    for (unsigned int i = 0; i < hapRow.size(); i++)
+    {
+        // if either one is missing value, they match!
+        if (IsMissingValueBit(genoRow[i]) == true || IsMissingValueBit(hapRow[i]) == true)
+        {
+            continue;
+        }
+
+        if (genoRow[i] != 2)
+        {
+            if (hapRow[i] != genoRow[i])
+            {
+                return false;
+            }
+            else
+            {
+                if (pComplement != NULL)
+                {
+                    pComplement->push_back(genoRow[i]);
+                }
+            }
+        }
+        else
+        {
+            if (pComplement != NULL)
+            {
+                if (hapRow[i] == 0)
+                {
+                    pComplement->push_back(1);
+                }
+                else
+                {
+                    pComplement->push_back(0);
+                }
+            }
+        }
+    }
     return true;
-  } else {
-    return false;
-  }
 }
-void ReOrderWithRemovedSites(const vector<int> &posAfterRem,
-                             const vector<int> &removedPos,
-                             vector<int> &posBeforeRemove) {
-  // THis funciton is often used here
-  // For example, we often removed sites from the matrix but then we need to
-  // know their original positions this function consider that by adding the
-  // removed sites back into order (not directly into posBeforeRem) but rather
-  // consider them when adding
-  posBeforeRemove.clear();
-
-  unsigned int pos = 0;
-  for (unsigned int i = 0; i < posAfterRem.size(); ++i) {
-    while (pos < removedPos.size() &&
-           posAfterRem[i] + (int)pos >= removedPos[pos]) {
-      pos++;
-    }
-    posBeforeRemove.push_back(posAfterRem[i] + pos);
-  }
-}
-
-void GetSubsetVec(const vector<int> &vecOriginal, const set<int> &sitesToKeep,
-                  vector<int> &vecNew) {
-  vecNew.clear();
-  for (unsigned int i = 0; i < vecOriginal.size(); ++i) {
-    if (sitesToKeep.find(i) != sitesToKeep.end()) {
-      vecNew.push_back(vecOriginal[i]);
-    }
-  }
-}
-
-void AddMissingVecBits(vector<int> &rowOrig, const set<int> &sitesToAdd,
-                       vector<int> &partialRow) {
-  YW_ASSERT_INFO(sitesToAdd.size() == partialRow.size(),
-                 "Parameter size mismatch");
-
-  // If there is othing to work, stop
-  if (sitesToAdd.size() == 0) {
-    return;
-  }
-
-  cout << "AddMissingVecBits: rowOrig = ";
-  DumpSequence(rowOrig);
-  cout << "Append sites ";
-  DumpIntSet(sitesToAdd);
-  cout << "Missing values = ";
-  DumpIntVec(partialRow);
-  // Here we try to add back some missing sites
-  vector<int> missingSites;
-  PopulateVecBySet(missingSites, sitesToAdd);
-
-  vector<int> res;
-  int posMiss = 0;
-  int posOrig = 0;
-  int curpos = 0;
-
-  while (posMiss < (int)partialRow.size() || posOrig < (int)rowOrig.size()) {
-    // check to see which bit to use and move
-    if (curpos != missingSites[posMiss]) {
-      // This bit is original
-      YW_ASSERT_INFO(posOrig < (int)rowOrig.size(),
-                     "Serious error: not enough bits.");
-      res.push_back(rowOrig[posOrig]);
-      posOrig++;
-    } else {
-      // No this is a missing bit
-      res.push_back(partialRow[posMiss]);
-      posMiss++;
-      ;
-    }
-
-    // now move on
-    curpos++;
-  }
-  rowOrig = res;
-  cout << "AddMissingVecBits: res = ";
-  DumpSequence(rowOrig);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////
-bool IsSequenceHaplotype(const SEQUENCE &seq) {
-  // note need to consider missing value!
-  for (unsigned int i = 0; i < seq.size(); ++i) {
-    if (seq[i] != 0 && seq[i] != 1 && IsMissingValueBit(seq[i]) == false) {
-      return false;
-    }
-  }
-  return true;
-}
-
-bool IsSequenceGenotype(const SEQUENCE &seq) {
-  for (unsigned int i = 0; i < seq.size(); ++i) {
-    if (seq[i] != 0 && seq[i] != 1 && seq[i] != 2 &&
-        IsMissingValueBit(seq[i]) == false) {
-      return false;
-    }
-  }
-  return true;
-}
-
-bool CanPhaseGenoRow(const SEQUENCE &hap1, const SEQUENCE &hap2,
-                     const SEQUENCE &geno) {
-  YW_ASSERT_INFO(IsSequenceHaplotype(hap1), "hap1 is not haplotype row.");
-  YW_ASSERT_INFO(IsSequenceHaplotype(hap2), "hap1 is not haplotype row.");
-  YW_ASSERT_INFO(IsSequenceGenotype(geno), "hap1 is not haplotype row.");
-  YW_ASSERT_INFO(hap1.size() == hap2.size(),
-                 "Tow hap rows are not equal length");
-  YW_ASSERT_INFO(geno.size() == geno.size(),
-                 "Geno row is not the same size as hap row.");
-  // for now, do not allow hap1/hap2 contian missing value
-  YW_ASSERT_INFO(IsSeqHasMV(hap1) == false && IsSeqHasMV(hap2) == false,
-                 "Hap1/Hap2 can not contain missing values");
-  // cout << "hap1 = ";
-  // DumpIntVec(hap1 );
-  // cout << "hap2 = ";
-  // DumpIntVec(hap2 );
-  // cout << "geno = ";
-  // DumpIntVec( geno );
-  for (unsigned int i = 0; i < hap1.size(); i++) {
-    // a missing vlaue can be phased either way
-    if (IsMissingValueBit(geno[i]) == true) {
-      continue;
-    }
-
-    if (geno[i] == 2) {
-      if (hap1[i] + hap2[i] != 1) {
-        return false;
-      }
-    } else {
-      if (hap1[i] + hap2[i] != 2 * geno[i]) {
+
+bool AreHapGenoRowsSame(const SEQUENCE &hapRow, const SEQUENCE &genoRow)
+{
+    YW_ASSERT_INFO(IsSequenceHaplotype(hapRow), "hap is not haplotype row.");
+    YW_ASSERT_INFO(IsSequenceGenotype(genoRow), "genorow is not haplotype row.");
+    return AreTwoSeqsCompatible(hapRow, genoRow);
+}
+
+bool IsTrivialRow(const SEQUENCE &row, SEQUENCE &resolved1, SEQUENCE &resolved2)
+{
+    resolved1.clear();
+    resolved2.clear();
+    // A row is trivial if it contains only a single 2
+    YW_ASSERT_INFO(IsSequenceGenotype(row), "hap is not haplotype row.");
+    int num2s = 0;
+    for (unsigned int i = 0; i < row.size(); ++i)
+    {
+        if (row[i] == 2)
+        {
+            ++num2s;
+            if (num2s > 1)
+            {
+                break;
+            }
+        }
+        if (row[i] == 2)
+        {
+            resolved1.push_back(0);
+            resolved2.push_back(1);
+        }
+        else
+        {
+            resolved1.push_back(row[i]);
+            resolved2.push_back(row[i]);
+        }
+    }
+    if (num2s == 1)
+    {
+        // For now, we do not consider a row with no twos as new
+        return true;
+    }
+    else
+    {
         return false;
-      }
     }
-  }
-  return true;
 }
 
-bool AreHapGenoRowCompatible(const SEQUENCE &hapRow, const SEQUENCE &genoRow,
-                             SEQUENCE *pComplement) {
-  if (pComplement != NULL) {
-    pComplement->clear();
-  }
+bool IsHapSeqSmaller(const SEQUENCE &hapRow1, const SEQUENCE &hapRow2)
+{
+    // Decide whether hapRow1 is smaller
+    // used in situations when we need to compare two rows
+    YW_ASSERT_INFO(IsSequenceHaplotype(hapRow1), "hap1 is not haplotype row.");
+    YW_ASSERT_INFO(IsSequenceHaplotype(hapRow2), "hap2 is not haplotype row.");
+    YW_ASSERT_INFO(hapRow1.size() == hapRow2.size(), "Tow hap rows are not equal length");
+    // do not handle MV in this function
+    YW_ASSERT_INFO(IsSeqHasMV(hapRow1) == false && IsSeqHasMV(hapRow2) == false, "Can not handle MV here");
 
-  // Check if the haplotype row can be a phasing of the geno row
-  YW_ASSERT_INFO(IsSequenceHaplotype(hapRow), "hap is not haplotype row.");
-  YW_ASSERT_INFO(IsSequenceGenotype(genoRow), "genorow is not haplotype row.");
-  for (unsigned int i = 0; i < hapRow.size(); i++) {
-    // if either one is missing value, they match!
-    if (IsMissingValueBit(genoRow[i]) == true ||
-        IsMissingValueBit(hapRow[i]) == true) {
-      continue;
+    for (unsigned int i = 0; i < hapRow1.size(); ++i)
+    {
+        if (hapRow1[i] < hapRow2[i])
+        {
+            return true;
+        }
     }
-
-    if (genoRow[i] != 2) {
-      if (hapRow[i] != genoRow[i]) {
-        return false;
-      } else {
-        if (pComplement != NULL) {
-          pComplement->push_back(genoRow[i]);
-        }
-      }
-    } else {
-      if (pComplement != NULL) {
-        if (hapRow[i] == 0) {
-          pComplement->push_back(1);
-        } else {
-          pComplement->push_back(0);
-        }
-      }
-    }
-  }
-  return true;
-}
-
-bool AreHapGenoRowsSame(const SEQUENCE &hapRow, const SEQUENCE &genoRow) {
-  YW_ASSERT_INFO(IsSequenceHaplotype(hapRow), "hap is not haplotype row.");
-  YW_ASSERT_INFO(IsSequenceGenotype(genoRow), "genorow is not haplotype row.");
-  return AreTwoSeqsCompatible(hapRow, genoRow);
-}
-
-bool IsTrivialRow(const SEQUENCE &row, SEQUENCE &resolved1,
-                  SEQUENCE &resolved2) {
-  resolved1.clear();
-  resolved2.clear();
-  // A row is trivial if it contains only a single 2
-  YW_ASSERT_INFO(IsSequenceGenotype(row), "hap is not haplotype row.");
-  int num2s = 0;
-  for (unsigned int i = 0; i < row.size(); ++i) {
-    if (row[i] == 2) {
-      ++num2s;
-      if (num2s > 1) {
-        break;
-      }
-    }
-    if (row[i] == 2) {
-      resolved1.push_back(0);
-      resolved2.push_back(1);
-    } else {
-      resolved1.push_back(row[i]);
-      resolved2.push_back(row[i]);
-    }
-  }
-  if (num2s == 1) {
-    // For now, we do not consider a row with no twos as new
-    return true;
-  } else {
     return false;
-  }
-}
-
-bool IsHapSeqSmaller(const SEQUENCE &hapRow1, const SEQUENCE &hapRow2) {
-  // Decide whether hapRow1 is smaller
-  // used in situations when we need to compare two rows
-  YW_ASSERT_INFO(IsSequenceHaplotype(hapRow1), "hap1 is not haplotype row.");
-  YW_ASSERT_INFO(IsSequenceHaplotype(hapRow2), "hap2 is not haplotype row.");
-  YW_ASSERT_INFO(hapRow1.size() == hapRow2.size(),
-                 "Tow hap rows are not equal length");
-  // do not handle MV in this function
-  YW_ASSERT_INFO(IsSeqHasMV(hapRow1) == false && IsSeqHasMV(hapRow2) == false,
-                 "Can not handle MV here");
-
-  for (unsigned int i = 0; i < hapRow1.size(); ++i) {
-    if (hapRow1[i] < hapRow2[i]) {
-      return true;
-    }
-  }
-  return false;
-}
-
-void GetHyperCubeSeq(int hcSeq, SEQUENCE &seq, int hcWidth) {
-  ConvIntToVecMSB(hcSeq, seq, hcWidth);
-}
-
-int GetSeqIdFromSeq(const SEQUENCE &seq) {
-  // do not support MV
-  YW_ASSERT_INFO(IsSeqHasMV(seq) == false, "Can not support MV");
-  return ConvVecToIntMSB(seq);
-}
-
-int GetHyperCubSeqBit(int hcSeq, int bit, int hcWidth) {
-  // Retrive the bit in the hcSeq at the specified bit
-  // But  note that we are assuming bit 0 is on the left (BIG ENDIAN)
-  int shiftpos = hcWidth - bit - 1;
-  int mask = 0x1 << shiftpos;
-  int res = (hcSeq & mask) >> shiftpos;
-  YW_ASSERT_INFO(res == 0 || res == 1, "Serious error here.");
-  return res;
-}
-
-void FindNonSegSites(const set<HCSequence> &setSeqs, set<int> &sites,
-                     int dataWidth) {
-  // Now we find out whe mutation sites that are not used in the setSeqs
-  // Find the set of sites that are not segragating in the set of sequences
-  for (int i = 0; i < dataWidth; ++i) {
-    bool fZero = false, fOne = false;
-    // Check to see if site i is segragating or not
-    for (set<int>::iterator it = setSeqs.begin(); it != setSeqs.end(); ++it) {
-      int rn = *it;
-      // cout <<"In FindNonSegragateSites: rn = " << rn << endl;
-      if (GetHyperCubSeqBit(rn, i, dataWidth) == 0) {
-        fZero = true;
-      } else {
-        fOne = true;
-      }
-      if (fZero && fOne) {
-        break;
-      }
-    }
-    if (fZero == false || fOne == false) {
-      sites.insert(i);
-    }
-  }
-}
-
-void FindNonSegSites(const set<SEQUENCE> &setSeqs, set<int> &sites,
-                     int dataLen) {
-  sites.clear();
-  if (setSeqs.size() == 0) {
-    // Every one is non-segragating
-    for (int i = 0; i < dataLen; ++i) {
-      sites.insert(i);
-    }
-    return;
-  }
-
-  for (int i = 0; i < dataLen; ++i) {
-    bool fZero = false, fOne = false;
-    // Check to see if site i is segragating or not
-    for (set<SEQUENCE>::iterator it = setSeqs.begin(); it != setSeqs.end();
-         ++it) {
-      SEQUENCE row = *it;
-      YW_ASSERT_INFO(IsSequenceHaplotype(row),
-                     "This function only works for haplotype");
-      // cout <<"In FindNonSegragateSites: rn = " << rn << endl;
-      if (row[i] == 0) {
-        fZero = true;
-      } else if (row[i] == 1) {
-        fOne = true;
-      }
-      if (fZero && fOne) {
-        break;
-      }
-    }
-    if (fZero == false || fOne == false) {
-      sites.insert(i);
-    }
-  }
-}
-
-void CreateGenoRowFromHapRows(const SEQUENCE &hapRow1, const SEQUENCE &hapRow2,
-                              SEQUENCE &genoRow) {
-  // Check if the haplotype row can be a phasing of the geno row
-  YW_ASSERT_INFO(IsSequenceHaplotype(hapRow1), "hap1 is not haplotype row.");
-  YW_ASSERT_INFO(IsSequenceHaplotype(hapRow2), "hap2 is not haplotype row.");
-  // do not allow missing vlaues
-  YW_ASSERT_INFO(IsSeqHasMV(hapRow1) == false && IsSeqHasMV(hapRow2) == false,
-                 "Can not handle MV");
-  genoRow.clear();
-  for (unsigned int i = 0; i < hapRow1.size(); i++) {
-    if (hapRow1[i] == hapRow2[i]) {
-      genoRow.push_back(hapRow1[i]);
-    } else {
-      genoRow.push_back(2);
-    }
-  }
-}
-
-int IsHCSeqsMutPair(HCSequence seq1, HCSequence seq2, int dataWidth) {
-  // This function test if seq1/seq2 is mutation pair, and return the mut site
-  // if so
-  for (int p = 0; p < dataWidth; p++) {
-    int shiftpos = dataWidth - p - 1;
+}
+
+void GetHyperCubeSeq(int hcSeq, SEQUENCE &seq, int hcWidth)
+{
+    ConvIntToVecMSB(hcSeq, seq, hcWidth);
+}
+
+int GetSeqIdFromSeq(const SEQUENCE &seq)
+{
+    // do not support MV
+    YW_ASSERT_INFO(IsSeqHasMV(seq) == false, "Can not support MV");
+    return ConvVecToIntMSB(seq);
+}
+
+int GetHyperCubSeqBit(int hcSeq, int bit, int hcWidth)
+{
+    // Retrive the bit in the hcSeq at the specified bit
+    // But  note that we are assuming bit 0 is on the left (BIG ENDIAN)
+    int shiftpos = hcWidth - bit - 1;
     int mask = 0x1 << shiftpos;
+    int res = (hcSeq & mask) >> shiftpos;
+    YW_ASSERT_INFO(res == 0 || res == 1, "Serious error here.");
+    return res;
+}
+
+void FindNonSegSites(const set<HCSequence> &setSeqs, set<int> &sites, int dataWidth)
+{
+    // Now we find out whe mutation sites that are not used in the setSeqs
+    // Find the set of sites that are not segragating in the set of sequences
+    for (int i = 0; i < dataWidth; ++i)
+    {
+        bool fZero = false, fOne = false;
+        // Check to see if site i is segragating or not
+        for (set<int>::iterator it = setSeqs.begin(); it != setSeqs.end(); ++it)
+        {
+            int rn = *it;
+            //cout <<"In FindNonSegragateSites: rn = " << rn << endl;
+            if (GetHyperCubSeqBit(rn, i, dataWidth) == 0)
+            {
+                fZero = true;
+            }
+            else
+            {
+                fOne = true;
+            }
+            if (fZero && fOne)
+            {
+                break;
+            }
+        }
+        if (fZero == false || fOne == false)
+        {
+            sites.insert(i);
+        }
+    }
+}
 
-    if ((seq1 | mask) == (seq2 | mask)) {
-      return p;
+void FindNonSegSites(const set<SEQUENCE> &setSeqs, set<int> &sites, int dataLen)
+{
+    sites.clear();
+    if (setSeqs.size() == 0)
+    {
+        // Every one is non-segragating
+        for (int i = 0; i < dataLen; ++i)
+        {
+            sites.insert(i);
+        }
+        return;
     }
-  }
 
-  return -1; // indicate NOT-pair
+    for (int i = 0; i < dataLen; ++i)
+    {
+        bool fZero = false, fOne = false;
+        // Check to see if site i is segragating or not
+        for (set<SEQUENCE>::iterator it = setSeqs.begin(); it != setSeqs.end(); ++it)
+        {
+            SEQUENCE row = *it;
+            YW_ASSERT_INFO(IsSequenceHaplotype(row), "This function only works for haplotype");
+            //cout <<"In FindNonSegragateSites: rn = " << rn << endl;
+            if (row[i] == 0)
+            {
+                fZero = true;
+            }
+            else if (row[i] == 1)
+            {
+                fOne = true;
+            }
+            if (fZero && fOne)
+            {
+                break;
+            }
+        }
+        if (fZero == false || fOne == false)
+        {
+            sites.insert(i);
+        }
+    }
 }
 
-bool IsHCSeqsMutPairAt(HCSequence seq1, HCSequence seq2, int dataWidth,
-                       int pos) {
-  // Different from the previous function, this check for a specific location
-  // instead of trying all possible positions
-  int shiftpos = dataWidth - pos - 1;
-  int mask = 0x1 << shiftpos;
-
-  if ((seq1 | mask) == (seq2 | mask)) {
-    return true;
-  }
-  return false;
+void CreateGenoRowFromHapRows(const SEQUENCE &hapRow1, const SEQUENCE &hapRow2, SEQUENCE &genoRow)
+{
+    // Check if the haplotype row can be a phasing of the geno row
+    YW_ASSERT_INFO(IsSequenceHaplotype(hapRow1), "hap1 is not haplotype row.");
+    YW_ASSERT_INFO(IsSequenceHaplotype(hapRow2), "hap2 is not haplotype row.");
+    // do not allow missing vlaues
+    YW_ASSERT_INFO(IsSeqHasMV(hapRow1) == false && IsSeqHasMV(hapRow2) == false, "Can not handle MV");
+    genoRow.clear();
+    for (unsigned int i = 0; i < hapRow1.size(); i++)
+    {
+        if (hapRow1[i] == hapRow2[i])
+        {
+            genoRow.push_back(hapRow1[i]);
+        }
+        else
+        {
+            genoRow.push_back(2);
+        }
+    }
 }
 
-void MutateHCSeqAt(const HCSequence seq, HCSequence &res, int dataWidth,
-                   int mutPos) {
-  int shiftpos = dataWidth - mutPos - 1;
-  int mask = 0x1 << shiftpos;
+int IsHCSeqsMutPair(HCSequence seq1, HCSequence seq2, int dataWidth)
+{
+    // This function test if seq1/seq2 is mutation pair, and return the mut site if so
+    for (int p = 0; p < dataWidth; p++)
+    {
+        int shiftpos = dataWidth - p - 1;
+        int mask = 0x1 << shiftpos;
 
-  res = (seq ^ mask);
+        if ((seq1 | mask) == (seq2 | mask))
+        {
+            return p;
+        }
+    }
+
+    return -1; // indicate NOT-pair
 }
 
-bool IsHCSeqRecombinnable(HCSequence hcSeq1, HCSequence hcSeq2, HCSequence st,
-                          int dataWidth) {
-  // ASSUME: s1 is LEFT part and s2 is RIGHT part
-  // This function test if s1 and s2 can recombine into st
-  // Now start recombining
-  for (int bkpt = 0; bkpt < dataWidth - 1; ++bkpt) {
-    unsigned int maskLower = (0x1 << (bkpt + 1)) - 1;
-    unsigned int maskUpper = (0x1 << dataWidth) - 1 - maskLower;
+bool IsHCSeqsMutPairAt(HCSequence seq1, HCSequence seq2, int dataWidth, int pos)
+{
+    // Different from the previous function, this check for a specific location
+    // instead of trying all possible positions
+    int shiftpos = dataWidth - pos - 1;
+    int mask = 0x1 << shiftpos;
 
-    // Generate s sequence
-    int seq1 = ((hcSeq1 & maskLower) | (hcSeq2 & maskUpper));
-    if (seq1 == st) {
-      return true;
+    if ((seq1 | mask) == (seq2 | mask))
+    {
+        return true;
     }
+    return false;
+}
+
+void MutateHCSeqAt(const HCSequence seq, HCSequence &res, int dataWidth, int mutPos)
+{
+    int shiftpos = dataWidth - mutPos - 1;
+    int mask = 0x1 << shiftpos;
+
+    res = (seq ^ mask);
+}
+
+bool IsHCSeqRecombinnable(HCSequence hcSeq1, HCSequence hcSeq2, HCSequence st, int dataWidth)
+{
+    // ASSUME: s1 is LEFT part and s2 is RIGHT part
+    // This function test if s1 and s2 can recombine into st
+    // Now start recombining
+    for (int bkpt = 0; bkpt < dataWidth - 1; ++bkpt)
+    {
+        unsigned int maskLower = (0x1 << (bkpt + 1)) - 1;
+        unsigned int maskUpper = (0x1 << dataWidth) - 1 - maskLower;
+
+        // Generate s sequence
+        int seq1 = ((hcSeq1 & maskLower) | (hcSeq2 & maskUpper));
+        if (seq1 == st)
+        {
+            return true;
+        }
 #if 0 // here we
         int seq2 = ((hcSeq1 & maskUpper)  | (hcSeq2 & maskLower) );
         if( seq2 == st)
@@ -638,19 +770,19 @@ bool IsHCSeqRecombinnable(HCSequence hcSeq1, HCSequence hcSeq2, HCSequence st,
             return true;
         }
 #endif
-  }
+    }
 
-  return false;
+    return false;
 }
 
-void RecombineHCSeqs(const HCSequence hcSeq1, const HCSequence hcSeq2,
-                     HCSequence &res, int dataWidth, int bkpt) {
-  // ASSUME: s1 is LEFT part and s2 is RIGHT part
-  // This function test if s1 and s2 can recombine into st
-  // Now start recombining
-  unsigned int maskLower = (0x1 << (bkpt + 1)) - 1;
-  unsigned int maskUpper = (0x1 << dataWidth) - 1 - maskLower;
+void RecombineHCSeqs(const HCSequence hcSeq1, const HCSequence hcSeq2, HCSequence &res, int dataWidth, int bkpt)
+{
+    // ASSUME: s1 is LEFT part and s2 is RIGHT part
+    // This function test if s1 and s2 can recombine into st
+    // Now start recombining
+    unsigned int maskLower = (0x1 << (bkpt + 1)) - 1;
+    unsigned int maskUpper = (0x1 << dataWidth) - 1 - maskLower;
 
-  // Generate s sequence
-  res = ((hcSeq1 & maskLower) | (hcSeq2 & maskUpper));
+    // Generate s sequence
+    res = ((hcSeq1 & maskLower) | (hcSeq2 & maskUpper));
 }
diff --git a/trisicell/external/scistree/Utils2.h b/trisicell/external/scistree/Utils2.h
index 4dc5e35..cc9a767 100644
--- a/trisicell/external/scistree/Utils2.h
+++ b/trisicell/external/scistree/Utils2.h
@@ -1,23 +1,23 @@
 #ifndef UTILS2_H
 #define UTILS2_H
 
-#include <cstdio>
-#include <cstdlib>
-#include <fstream>
-#include <iostream>
 #include <list>
-#include <map>
+#include <vector>
 #include <set>
+#include <map>
 #include <string>
-#include <vector>
+#include <iostream>
+#include <fstream>
+#include <cstdio>
+#include <cstdlib>
 //#include <limits>
 using namespace std;
 
-#include "Utils.h"
-#include <ctime>
 #include <sys/types.h>
 #include <time.h>
 #include <unistd.h>
+#include <ctime>
+#include "Utils.h"
 
 // This file contains some extra utilties that are frequently used
 
@@ -31,49 +31,39 @@ double GetElapseCPUTime(const std::clock_t &tmStart);
 bool IsBoolArrayAllTrue(bool *bArray, int size);
 void AppendIntVec(vector<int> &dest, const vector<int> &appending);
 bool IsSetContainer(const set<int> &contianer, const set<int> &contained);
-bool IsSetContainedInSets(const set<int> &s, const set<set<int> > &sets);
-bool IsSetContainingOneOfSets(const set<int> &s, const set<set<int> > &sets);
+bool IsSetContainedInSets(const set<int> &s, const set<set<int>> &sets);
+bool IsSetContainingOneOfSets(const set<int> &s, const set<set<int>> &sets);
 void ConcatIntVec(vector<int> &vecAdded, const vector<int> &vecToAdd);
 int ConvIntSetToPosition(const set<int> &s);
 void ConvPositionToIntSet(int val, set<int> &s);
 void PopulateSetWithInterval(set<int> &s, int left, int right);
 void GetSeqInterval(const SEQUENCE &row, SEQUENCE &rowIV, int left, int right);
-bool IsIntervalContained(const set<SEQUENCE> &seqs, int left, int right,
-                         const SEQUENCE &seqIV);
+bool IsIntervalContained(const set<SEQUENCE> &seqs, int left, int right, const SEQUENCE &seqIV);
 void SubtractSequenceSets(set<SEQUENCE> &s1, const set<SEQUENCE> &s2);
 void DumpSequence(const SEQUENCE &seq);
 void DumpVecSequences(const vector<SEQUENCE> &setSeqs);
 void DumpSetSequences(const set<SEQUENCE> &setSeqs);
-bool AreTwoInSameSet(int i1, int i2, const set<set<int> > &collections);
+bool AreTwoInSameSet(int i1, int i2, const set<set<int>> &collections);
 int GetItemIndexInVec(const vector<int> &vec, int item);
 bool IsIntervalOverlap(const INTERVAL &iv1, const INTERVAL &iv2);
-bool GetIntervalOverlap(const INTERVAL &iv1, const INTERVAL &iv2,
-                        INTERVAL &ivBoth);
+bool GetIntervalOverlap(const INTERVAL &iv1, const INTERVAL &iv2, INTERVAL &ivBoth);
 void GenerateRandBinVector(int sz, vector<int> &randVec);
 bool IsBinary(int val);
-void ReOrderWithRemovedSites(const vector<int> &posAfterRem,
-                             const vector<int> &removedPos,
-                             vector<int> &posBeforeRemove);
-void GetSubsetVec(const vector<int> &vecOriginal, const set<int> &sitesToKeep,
-                  vector<int> &vecNew);
-void AddMissingVecBits(vector<int> &rowComplete, const set<int> &sitesToAdd,
-                       vector<int> &partialRow);
+void ReOrderWithRemovedSites(const vector<int> &posAfterRem, const vector<int> &removedPos, vector<int> &posBeforeRemove);
+void GetSubsetVec(const vector<int> &vecOriginal, const set<int> &sitesToKeep, vector<int> &vecNew);
+void AddMissingVecBits(vector<int> &rowComplete, const set<int> &sitesToAdd, vector<int> &partialRow);
 
 // ***************************************************************************
 // Utilies for phasing
 // ***************************************************************************
 bool IsSequenceHaplotype(const SEQUENCE &seq);
 bool IsSequenceGenotype(const SEQUENCE &seq);
-bool CanPhaseGenoRow(const SEQUENCE &hap1, const SEQUENCE &hap2,
-                     const SEQUENCE &geno);
-bool AreHapGenoRowCompatible(const SEQUENCE &hapRow, const SEQUENCE &genoRow,
-                             SEQUENCE *pComplement = NULL);
+bool CanPhaseGenoRow(const SEQUENCE &hap1, const SEQUENCE &hap2, const SEQUENCE &geno);
+bool AreHapGenoRowCompatible(const SEQUENCE &hapRow, const SEQUENCE &genoRow, SEQUENCE *pComplement = NULL);
 bool AreHapGenoRowsSame(const SEQUENCE &hapRow, const SEQUENCE &genoRow);
-bool IsTrivialRow(const SEQUENCE &row, SEQUENCE &resolved1,
-                  SEQUENCE &resolved2);
+bool IsTrivialRow(const SEQUENCE &row, SEQUENCE &resolved1, SEQUENCE &resolved2);
 bool IsHapSeqSmaller(const SEQUENCE &hapRow1, const SEQUENCE &hapRow2);
-void CreateGenoRowFromHapRows(const SEQUENCE &hapRow1, const SEQUENCE &hapRow2,
-                              SEQUENCE &genoRow);
+void CreateGenoRowFromHapRows(const SEQUENCE &hapRow1, const SEQUENCE &hapRow2, SEQUENCE &genoRow);
 
 // ***************************************************************************
 // Utilies for hypercube related stuff
@@ -84,18 +74,12 @@ typedef int HCSequence;
 void GetHyperCubeSeq(int hcSeq, SEQUENCE &seq, int hcWidth);
 int GetHyperCubSeqBit(int hcSeq, int bit, int hcWidth);
 int GetSeqIdFromSeq(const SEQUENCE &seq);
-void FindNonSegSites(const set<HCSequence> &setSeqs, set<int> &sites,
-                     int dataWidth);
-void FindNonSegSites(const set<SEQUENCE> &setSeqs, set<int> &sites,
-                     int dataLen);
+void FindNonSegSites(const set<HCSequence> &setSeqs, set<int> &sites, int dataWidth);
+void FindNonSegSites(const set<SEQUENCE> &setSeqs, set<int> &sites, int dataLen);
 int IsHCSeqsMutPair(HCSequence seq1, HCSequence seq2, int dataWidth);
-bool IsHCSeqsMutPairAt(HCSequence seq1, HCSequence seq2, int dataWidth,
-                       int pos);
-void MutateHCSeqAt(const HCSequence seq, HCSequence &res, int dataWidth,
-                   int mutPos);
-bool IsHCSeqRecombinnable(HCSequence s1, HCSequence s2, HCSequence st,
-                          int dataWidth);
-void RecombineHCSeqs(const HCSequence hcSeq1, const HCSequence hcSeq2,
-                     HCSequence &res, int dataWidth, int bkpt);
+bool IsHCSeqsMutPairAt(HCSequence seq1, HCSequence seq2, int dataWidth, int pos);
+void MutateHCSeqAt(const HCSequence seq, HCSequence &res, int dataWidth, int mutPos);
+bool IsHCSeqRecombinnable(HCSequence s1, HCSequence s2, HCSequence st, int dataWidth);
+void RecombineHCSeqs(const HCSequence hcSeq1, const HCSequence hcSeq2, HCSequence &res, int dataWidth, int bkpt);
 
-#endif // UTILS2_H
+#endif //UTILS2_H
diff --git a/trisicell/external/scistree/Utils3.cpp b/trisicell/external/scistree/Utils3.cpp
index 01016a3..5dca452 100644
--- a/trisicell/external/scistree/Utils3.cpp
+++ b/trisicell/external/scistree/Utils3.cpp
@@ -1,499 +1,623 @@
-#include "Utils3.h"
 #include <algorithm>
-#include <cmath>
-#include <cstdio>
+#include "Utils3.h"
+#include <ctime>
 #include <cstdlib>
+#include <cstdio>
 #include <cstring>
-#include <ctime>
+#include <cmath>
 
-//////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////
 // HashTable functions
-//////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////
 
-YWHashItem ::~YWHashItem() {}
+YWHashItem :: ~YWHashItem()
+{
+}
 
-YWHashTable ::YWHashTable(int nB) : numBuckets(nB) {}
 
-YWHashTable ::~YWHashTable() {
-  // NOTE: has to free memory here
-  for (unsigned int i = 0; i < hashTable.size(); ++i) {
-    delete hashTable[i];
-  }
-  hashTable.clear();
+YWHashTable :: YWHashTable( int nB ) : numBuckets( nB)
+{
 }
 
-void YWHashTable ::AddItem(YWHashItem *pItem) { hashTable.push_back(pItem); }
-
-YWHashItem *YWHashTable ::GetIdenticalItem(YWHashItem *pItem) {
-  cout << "GetIdenticalItem: key = " << pItem->Key() << endl;
-  for (unsigned int i = 0; i < hashTable.size(); ++i) {
-    // cout << "We are here.\n";
-    YW_ASSERT_INFO(hashTable[i] != NULL, "Can not be nothing here.");
-    if (*hashTable[i] == *pItem) {
-      cout << "find it here.\n";
-      return hashTable[i];
+YWHashTable :: ~YWHashTable()
+{
+    // NOTE: has to free memory here
+    for( unsigned int i=0; i<hashTable.size(); ++i)
+    {
+        delete hashTable[i];
     }
-  }
-  cout << "did not find.\n";
-  return NULL;
+    hashTable.clear();
 }
 
-YWHashItem *YWHashTable ::GetFirstItem() {
-  cout << "GetFirstItem: size = " << hashTable.size() << endl;
-  if (hashTable.size() == 0) {
-    return NULL;
-  }
-  this->curPos = 0;
-  return hashTable[0];
+
+void YWHashTable :: AddItem( YWHashItem *pItem )
+{
+    hashTable.push_back( pItem );
 }
 
-YWHashItem *YWHashTable ::GetNextItem() {
-  cout << "GetNextItem: size = " << hashTable.size();
-  cout << ", curPos = " << curPos << endl;
-  if (this->curPos + 1 >= (int)hashTable.size()) {
-    cout << "No more item.\n";
+YWHashItem* YWHashTable :: GetIdenticalItem( YWHashItem *pItem )
+{
+cout << "GetIdenticalItem: key = " << pItem->Key() << endl;
+    for( unsigned int i=0; i<hashTable.size(); ++i)
+    {
+//cout << "We are here.\n";
+        YW_ASSERT_INFO( hashTable[i] != NULL, "Can not be nothing here." );
+        if( *hashTable[i]  == *pItem )
+        {
+cout << "find it here.\n";
+            return hashTable[i];
+        }
+    }
+cout << "did not find.\n";
     return NULL;
-  }
-  this->curPos++;
-  YWHashItem *pItem = hashTable[this->curPos];
-  YW_ASSERT_INFO(pItem != NULL, "Can not be nothing.");
-  cout << "GetNextItem.key() = " << pItem->Key() << endl;
-  return pItem;
 }
 
-int YWHashTable ::GetTotalItemNum() const { return hashTable.size(); }
+YWHashItem *  YWHashTable :: GetFirstItem()
+{
+cout << "GetFirstItem: size = " << hashTable.size() << endl;
+    if( hashTable.size() == 0 )
+    {
+        return NULL;
+    }
+    this->curPos = 0;
+    return hashTable[0];
+}
 
-void YWHashTable ::Dump() const {
-  for (unsigned int i = 0; i < hashTable.size(); ++i) {
-    // cout << "We are here.\n";
-    cout << "Key for item " << i << " = " << hashTable[i]->Key() << endl;
-  }
+YWHashItem * YWHashTable :: GetNextItem()
+{
+cout << "GetNextItem: size = " << hashTable.size() ;
+cout << ", curPos = " << curPos << endl;
+    if( this->curPos + 1>= (int)hashTable.size() )
+    {
+cout << "No more item.\n";
+        return NULL;
+    }
+    this->curPos ++;
+    YWHashItem *pItem = hashTable[ this->curPos ];
+    YW_ASSERT_INFO( pItem != NULL, "Can not be nothing." );
+cout << "GetNextItem.key() = " << pItem->Key() << endl;
+    return pItem;
 }
 
+int YWHashTable :: GetTotalItemNum() const
+{
+    return hashTable.size();
+}
+
+void YWHashTable :: Dump() const
+{
+    for( unsigned int i=0; i<hashTable.size(); ++i)
+    {
+//cout << "We are here.\n";
+        cout << "Key for item " << i << " = " << hashTable[i]->Key() << endl;
+    }
+}
+
+
 //
-bool SequenceCmp ::operator()(const SEQUENCE &seq1,
-                              const SEQUENCE &seq2) const {
-  if (seq1.size() != seq2.size()) {
-    DumpSequence(seq1);
-    DumpSequence(seq2);
-  }
+bool SequenceCmp :: operator() ( const SEQUENCE &seq1, const SEQUENCE &seq2 ) const
+{
+    if( seq1.size() != seq2.size() )
+    {
+        DumpSequence( seq1);
+        DumpSequence( seq2);
+    }
 
-  YW_ASSERT_INFO(seq1.size() == seq2.size(),
-                 "Can not compare two things with different length");
+    YW_ASSERT_INFO( seq1.size() == seq2.size(), "Can not compare two things with different length" );
 
-  for (int i = 0; i < (int)seq1.size(); ++i) {
-    if (seq1[i] < seq2[i]) {
-      return true;
-    } else if (seq1[i] > seq2[i]) {
-      return false;
+    for( int i=0; i<(int)seq1.size(); ++i )
+    {
+        if( seq1[i] < seq2[i] )
+        {
+            return true;
+        }
+        else if( seq1[i] > seq2[i] )
+        {
+            return false;
+        }
     }
-  }
 
-  // if all are equal
-  return false;
+    // if all are equal
+    return false;
 }
 
-//////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////////////////////////////////////
 // substring functions
-//////////////////////////////////////////////////////////////////////////////
-bool IsIntervalContained(const INTERVAL &iv1, const INTERVAL &iv2) {
-  if ((iv1.first >= iv2.first && iv1.second <= iv2.second) ||
-      (iv2.first >= iv1.first && iv2.second <= iv1.second)) {
-    return true;
-  }
-  return false;
+/////////////////////////////////////////////////////////////////////////////////////////
+bool IsIntervalContained( const INTERVAL &iv1, const INTERVAL &iv2)
+{
+    if( (iv1.first  >= iv2.first && iv1.second <= iv2.second)
+        ||  (iv2.first  >= iv1.first && iv2.second <= iv1.second)     )
+    {
+        return true;
+    }
+    return false;
+}
+int GetIntervalLen( const INTERVAL &iv)
+{
+    return iv.second - iv.first + 1;
 }
-int GetIntervalLen(const INTERVAL &iv) { return iv.second - iv.first + 1; }
 
-int GetRandItemInSet(const set<int> &items) {
-  vector<int> itemsVec;
-  PopulateVecBySet(itemsVec, items);
-  return GetRandItemInVec(itemsVec);
+int GetRandItemInSet( const set<int> &items )
+{
+    vector<int> itemsVec;
+    PopulateVecBySet( itemsVec, items );
+    return GetRandItemInVec(itemsVec);
 }
 
-int GetRandItemInVec(const vector<int> &items) {
-  YW_ASSERT_INFO(items.size() > 0, "You can not sample from an empty set");
+int GetRandItemInVec( const vector<int> &items )
+{
+    YW_ASSERT_INFO( items.size() >0, "You can not sample from an empty set" );
 
-  double frac = GetRandFraction();
-  return items[(int)(items.size() * frac)];
+    double frac = GetRandFraction();
+    return items[ (int)(items.size() * frac) ];
 }
 
-void GetRandVector(vector<int> &rndVec, int start, int end) {
-  set<int> itemsNotUsed;
-  PopulateSetWithInterval(itemsNotUsed, start, end);
-  while (itemsNotUsed.size() > 0) {
-    int itemRnd = GetRandItemInSet(itemsNotUsed);
-    rndVec.push_back(itemRnd);
-    itemsNotUsed.erase(itemRnd);
-  }
+void GetRandVector( vector<int> &rndVec, int start, int end )
+{
+    set<int> itemsNotUsed;
+    PopulateSetWithInterval( itemsNotUsed, start, end);
+    while( itemsNotUsed.size() > 0 )
+    {
+        int itemRnd = GetRandItemInSet( itemsNotUsed );
+        rndVec.push_back( itemRnd );
+        itemsNotUsed.erase( itemRnd );
+    }
 }
 
-int GetWeightedRandItemInVec(const vector<int> &items,
-                             const vector<double> &itemWeights) {
-  // cout << "items = ";
-  // DumpIntVec( items );
 
-  YW_ASSERT_INFO(items.size() == itemWeights.size(), "Size mismatch");
-  double accum = 0.0;
-  for (unsigned int i = 0; i < itemWeights.size(); ++i) {
-    // cout << "one weight = " << itemWeights[i] << endl;
-    accum += itemWeights[i];
-  }
-  YW_ASSERT_INFO(accum > 0.0000001, "2.Can not be too small");
-  double frac = GetRandFraction();
-  double curFract = 0.0;
-  for (unsigned int i = 0; i < itemWeights.size(); ++i) {
-    curFract += itemWeights[i] / accum;
-    if (curFract >= frac) {
-      return items[i];
+int GetWeightedRandItemInVec( const vector<int> &items, const vector<double> &itemWeights )
+{
+//cout << "items = ";
+//DumpIntVec( items );
+
+    YW_ASSERT_INFO( items.size() == itemWeights.size(), "Size mismatch" );
+    double accum = 0.0;
+    for( unsigned int i=0; i<itemWeights.size(); ++i )
+    {
+//cout << "one weight = " << itemWeights[i] << endl;
+        accum += itemWeights[i];
     }
-  }
-  return -1; // should nothappen
+    YW_ASSERT_INFO( accum > 0.0000001, "2.Can not be too small" );
+    double frac = GetRandFraction();
+    double curFract = 0.0;
+    for( unsigned int i=0; i<itemWeights.size(); ++i )
+    {
+        curFract += itemWeights[i]/accum;
+        if( curFract >= frac )
+        {
+            return items[i];
+        }
+    }
+    return -1;      // should nothappen
+
+
 }
 
 // This functionreturn a weighted uniformly item index from the list
-int GetWeightedRandItemIndex(const vector<double> &itemWeights) {
-  double accum = 0.0;
-  for (unsigned int i = 0; i < itemWeights.size(); ++i) {
-    // cout << "one weight = " << itemWeights[i] << endl;
-    accum += itemWeights[i];
-  }
-  // YW_ASSERT_INFO( accum > 0.0000001, "3. Can not be too small" );
-  double frac = GetRandFraction();
-  double curFract = 0.0;
-  for (unsigned int i = 0; i < itemWeights.size(); ++i) {
-    curFract += itemWeights[i] / accum;
-    if (curFract >= frac) {
-      return i;
+int GetWeightedRandItemIndex( const vector<double> &itemWeights )
+{
+    double accum = 0.0;
+    for( unsigned int i=0; i<itemWeights.size(); ++i )
+    {
+//cout << "one weight = " << itemWeights[i] << endl;
+        accum += itemWeights[i];
     }
-  }
-  // Can not come here
-  YW_ASSERT_INFO(false, "Something wrong here");
-  return -1; // should nothappen
+    //YW_ASSERT_INFO( accum > 0.0000001, "3. Can not be too small" );
+    double frac = GetRandFraction();
+    double curFract = 0.0;
+    for( unsigned int i=0; i<itemWeights.size(); ++i )
+    {
+        curFract += itemWeights[i]/accum;
+        if( curFract >= frac )
+        {
+            return i;
+        }
+    }
+    // Can not come here
+    YW_ASSERT_INFO(false, "Something wrong here");
+    return -1;      // should nothappen
+
 }
 
-void GetOrigSubset(const vector<int> &origVec, const set<int> &subsetInd,
-                   set<int> &subsetOrig) {
-  subsetOrig.clear();
-  for (set<int>::iterator it = subsetInd.begin(); it != subsetInd.end(); ++it) {
-    YW_ASSERT_INFO(*it < (int)origVec.size(), "Size exceeds");
-    subsetOrig.insert(origVec[*it]);
-  }
+void GetOrigSubset( const vector<int> &origVec, const set<int> &subsetInd, set<int> &subsetOrig )
+{
+    subsetOrig.clear();
+    for( set<int> :: iterator it = subsetInd.begin(); it != subsetInd.end(); ++it )
+    {
+        YW_ASSERT_INFO( *it < (int)origVec.size(), "Size exceeds" );
+        subsetOrig.insert( origVec[*it] );
+    }
 }
 
-void MutateSequenceAtSites(SEQUENCE &mutSeq, vector<int> &mutSites) {
-  for (unsigned int p = 0; p < mutSites.size(); ++p) {
-    MutateSeqAtSite(mutSeq, mutSites[p]);
-  }
+void MutateSequenceAtSites( SEQUENCE & mutSeq, vector<int> & mutSites )
+{
+    for( unsigned int p=0; p<mutSites.size(); ++p )
+    {
+        MutateSeqAtSite( mutSeq, mutSites[p]  );
+    }
 }
 
-void DumpDoubleVec(const vector<double> &vecDoubles) {
-  cout << "Double vector contains: ";
-  for (unsigned int i = 0; i < vecDoubles.size(); ++i) {
-    cout << vecDoubles[i] << ", ";
-  }
-  cout << endl;
+void DumpDoubleVec(const vector<double> &vecDoubles)
+{
+    cout << "Double vector contains: ";
+    for( unsigned int i=0; i<vecDoubles.size(); ++i)
+    {
+        cout << vecDoubles[i] << ", ";
+    }
+    cout << endl;
 }
-void DumpDoubleVec(const vector<long double> &vecDoubles) {
-  cout << "Double vector contains: ";
-  for (unsigned int i = 0; i < vecDoubles.size(); ++i) {
-    cout << vecDoubles[i] << ", ";
-  }
-  cout << endl;
+void DumpDoubleVec(const vector<long double> &vecDoubles)
+{
+    cout << "Double vector contains: ";
+    for( unsigned int i=0; i<vecDoubles.size(); ++i)
+    {
+        cout << vecDoubles[i] << ", ";
+    }
+    cout << endl;
 }
-void DumpBoolVec(const vector<bool> &vecBools) {
-  cout << "Bool vector contains: ";
-  for (unsigned int i = 0; i < vecBools.size(); ++i) {
-    if (vecBools[i] == true) {
-      cout << "1,";
-    } else {
-      cout << "0, ";
+void DumpBoolVec( const vector<bool> &vecBools)
+{
+    cout << "Bool vector contains: ";
+    for( unsigned int i=0; i<vecBools.size(); ++i)
+    {
+        if( vecBools[i] == true )
+        {
+            cout << "1,";
+        }
+        else
+        {
+            cout << "0, ";
+        }
     }
-  }
-  cout << endl;
+    cout << endl;
 }
 
-int GetLargestIndiceInDoubleVec(const vector<double> &vecDoubles) {
-  YW_ASSERT_INFO(vecDoubles.size() > 0, "Can not have empty vec");
-  double maxv = vecDoubles[0];
-  int res = 0;
-  for (unsigned int i = 0; i < vecDoubles.size(); ++i) {
-    if (vecDoubles[i] > maxv) {
-      maxv = vecDoubles[i];
-      res = i;
+int GetLargestIndiceInDoubleVec(const vector<double> &vecDoubles)
+{
+	YW_ASSERT_INFO( vecDoubles.size() > 0, "Can not have empty vec" );
+    double maxv = vecDoubles[0];
+    int res = 0;
+    for( unsigned int i=0; i<vecDoubles.size(); ++i)
+    {
+        if( vecDoubles[i]  > maxv )
+        {
+            maxv = vecDoubles[i];
+            res = i;
+        }
     }
-  }
-  return res;
+    return res;
 }
 
-int GetLargestIndiceInDoubleVec(const vector<long double> &vecDoubles) {
-  long double maxv = 0.0;
-  int res = 0;
-  for (unsigned int i = 0; i < vecDoubles.size(); ++i) {
-    if (vecDoubles[i] > maxv) {
-      maxv = vecDoubles[i];
-      res = i;
+int GetLargestIndiceInDoubleVec(const vector<long double> &vecDoubles)
+{
+    long double maxv = 0.0;
+    int res = 0;
+    for( unsigned int i=0; i<vecDoubles.size(); ++i)
+    {
+        if( vecDoubles[i]  > maxv )
+        {
+            maxv = vecDoubles[i];
+            res = i;
+        }
     }
-  }
-  return res;
+    return res;
 }
 
-double FindMedian(const vector<double> &vecVals) {
-  // for now, if there is nothing in the list, return 0.0
-  if (vecVals.size() == 0) {
-    return 0.0;
-  }
+double FindMedian( const vector<double> &vecVals )
+{
+    // for now, if there is nothing in the list, return 0.0
+    if( vecVals.size() == 0 )
+    {
+        return 0.0;
+    }
 
-  YW_ASSERT_INFO(vecVals.size() > 0, "FindMedian: Can not be empty");
-
-  // Find median value for the vector
-  // first sort the list of course
-  vector<double> listToTry = vecVals;
-  SortDoubleVec(listToTry);
-  // now find the median one
-  // int totSize = (int)listToTry.size();
-  int pos = (int)(((int)listToTry.size() - 1) / 2);
-  return listToTry[pos];
-}
-
-long double FindMedian(const vector<long double> &vecVals) {
-  YW_ASSERT_INFO(vecVals.size() > 0, "FindMedian: Can not be empty");
-
-  // Find median value for the vector
-  // first sort the list of course
-  vector<long double> listToTry = vecVals;
-  SortDoubleVec(listToTry);
-  // now find the median one
-  // int totSize = (int)listToTry.size();
-  int pos = (int)(((int)listToTry.size() - 1) / 2);
-  return listToTry[pos];
-}
-
-double FindRankedItem(const vector<double> &vecVals, int rank) {
-  YW_ASSERT_INFO(rank < (int)vecVals.size(), "Rank: overflow");
-  vector<double> listToTry = vecVals;
-  SortDoubleVec(listToTry);
-  return listToTry[rank];
-}
-
-double FindMaxDouble(const vector<double> &vecVals) {
-  // fnd max value of the solution here, assuming all values are non-negative
-  vector<double> listToTry = vecVals;
-  SortDoubleVec(listToTry);
-  // cout << "vecVals = ";
-  // DumpDoubleVec( vecVals );
-
-  double res = listToTry[listToTry.size() - 1];
-  // cout << "res = " << res << endl;
-  return res;
-}
-
-double FindMaxDouble(const vector<long double> &vecVals) {
-  // fnd max value of the solution here, assuming all values are non-negative
-  vector<long double> listToTry = vecVals;
-  SortDoubleVec(listToTry);
-  // cout << "vecVals = ";
-  // DumpDoubleVec( vecVals );
-
-  double res = listToTry[listToTry.size() - 1];
-  // cout << "res = " << res << endl;
-  return res;
-}
-
-static int QSortCompareDouble(const void *arg1, const void *arg2) {
-  /* Compare all of both strings: */
-  // assume sorting in accending order
-  double n1 = *((double *)arg1);
-  double n2 = *((double *)arg2);
-  // cout <<"arg1 = " << n1 << ", arg2 = " << n2 << endl;
-  if (n1 > n2) {
-    return 1;
-  } else if (n1 < n2) {
-    return -1;
-  } else {
-    return 0;
-  }
+
+    YW_ASSERT_INFO( vecVals.size() > 0, "FindMedian: Can not be empty" );
+
+    // Find median value for the vector
+    // first sort the list of course
+    vector<double> listToTry = vecVals;
+    SortDoubleVec( listToTry );
+    // now find the median one
+    //int totSize = (int)listToTry.size();
+    int pos = (int)( ((int)listToTry.size()-1)/2);
+    return listToTry[pos];
 }
 
-void SortDoubleVec(vector<double> &vecVals, int start, int end) {
-  //#if 0
-  if (vecVals.size() <= 1) {
-    // do nothing
-    return;
-  }
-  // cout << "Before sort, double vec = ";
-  // DumpDoubleVec( vecVals );
-  if (end < 0) {
-    end = vecVals.size() - 1;
-  }
-  int sortLen = end - start + 1;
-  double *array = new double[sortLen];
-  for (int i = start; i <= end; ++i) {
-    array[i - start] = vecVals[i];
-  }
-  qsort((void *)array, sortLen, sizeof(double), QSortCompareDouble);
-  // Now write back
-  for (int i = start; i <= end; ++i) {
-    vecVals[i] = array[i - start];
-  }
+long double FindMedian( const vector<long double> &vecVals )
+{
+    YW_ASSERT_INFO( vecVals.size() > 0, "FindMedian: Can not be empty" );
 
-  delete[] array;
-  //#endif
-  // cout << "After sort, double vec = ";
-  // DumpDoubleVec( vecVals );
-}
-
-static int QSortCompareLongDouble(const void *arg1, const void *arg2) {
-  /* Compare all of both strings: */
-  // assume sorting in accending order
-  long double n1 = *((long double *)arg1);
-  long double n2 = *((long double *)arg2);
-  // cout <<"arg1 = " << n1 << ", arg2 = " << n2 << endl;
-  if (n1 > n2) {
-    return 1;
-  } else if (n1 < n2) {
-    return -1;
-  } else {
-    return 0;
-  }
+    // Find median value for the vector
+    // first sort the list of course
+    vector<long double> listToTry = vecVals;
+    SortDoubleVec( listToTry );
+    // now find the median one
+    //int totSize = (int)listToTry.size();
+    int pos = (int)( ((int)listToTry.size()-1)/2);
+    return listToTry[pos];
 }
 
-void SortDoubleVec(vector<long double> &vecVals, int start, int end) {
-  //#if 0
-  if (vecVals.size() <= 1) {
-    // do nothing
-    return;
-  }
-  // cout << "Before sort, double vec = ";
-  // DumpDoubleVec( vecVals );
-  if (end < 0) {
-    end = vecVals.size() - 1;
-  }
-  int sortLen = end - start + 1;
-  long double *array = new long double[sortLen];
-  for (int i = start; i <= end; ++i) {
-    array[i - start] = vecVals[i];
-  }
-  qsort((void *)array, sortLen, sizeof(long double), QSortCompareLongDouble);
-  // Now write back
-  for (int i = start; i <= end; ++i) {
-    vecVals[i] = array[i - start];
-  }
+double FindRankedItem( const vector<double> &vecVals, int rank )
+{
+	YW_ASSERT_INFO( rank < (int)vecVals.size(), "Rank: overflow" );
+    vector<double> listToTry = vecVals;
+    SortDoubleVec( listToTry );
+	return listToTry[rank];
+}
+
+double FindMaxDouble( const vector<double> &vecVals )
+{
+    // fnd max value of the solution here, assuming all values are non-negative
+    vector<double> listToTry = vecVals;
+    SortDoubleVec( listToTry );
+//cout << "vecVals = ";
+//DumpDoubleVec( vecVals );
 
-  delete[] array;
-  //#endif
-  // cout << "After sort, double vec = ";
-  // DumpDoubleVec( vecVals );
+    double res = listToTry[ listToTry.size()-1 ];
+//cout << "res = " << res << endl;
+    return res;
 }
 
-void FindUniformColumns(const vector<SEQUENCE> &listSeqs, set<int> &uniSites) {
-  uniSites.clear();
-  if (listSeqs.size() == 0) {
-    return;
-  }
-  int numSites = (int)listSeqs[0].size();
-  for (int i = 0; i < numSites; ++i) {
-    bool f0 = false, f1 = false;
-    for (int r = 0; r < (int)listSeqs.size(); ++r) {
-      if (listSeqs[r][i] == 0) {
-        f0 = true;
-      } else if (listSeqs[r][i] == 1) {
-        f1 = true;
-      }
-      if (f0 == true && f1 == true) {
-        // not uniform,
-        break;
-      }
-    }
-    if (f0 == false || f1 == false) {
-      // yes, this site is uniform
-      uniSites.insert(i);
+double FindMaxDouble( const vector<long double> &vecVals )
+{
+    // fnd max value of the solution here, assuming all values are non-negative
+    vector<long double> listToTry = vecVals;
+    SortDoubleVec( listToTry );
+//cout << "vecVals = ";
+//DumpDoubleVec( vecVals );
+
+    double res = listToTry[ listToTry.size()-1 ];
+//cout << "res = " << res << endl;
+    return res;
+}
+
+
+static int QSortCompareDouble( const void *arg1, const void *arg2 )
+{
+   /* Compare all of both strings: */
+    // assume sorting in accending order
+    double n1 = *((double *) arg1);
+    double n2 = *((double *) arg2);
+//cout <<"arg1 = " << n1 << ", arg2 = " << n2 << endl;
+    if( n1 > n2)
+    {
+        return 1;
+    }
+    else if( n1 < n2)
+    {
+        return -1;
+    }
+    else
+    {
+        return 0;
     }
-  }
 }
 
-void BreakSeqAtBkpt(const SEQUENCE &seq, int bkpt, SEQUENCE &seqLeft,
-                    SEQUENCE &seqRight) {
-  seqLeft.clear();
-  seqRight.clear();
-  for (int i = 0; i < (int)seq.size(); ++i) {
-    if (i <= bkpt) {
-      // then the right seq get MV
-      seqLeft.push_back(seq[i]);
-      seqRight.push_back(MISSING_VALUE_BIT);
-    } else {
-      seqLeft.push_back(MISSING_VALUE_BIT);
-      seqRight.push_back(seq[i]);
+
+void SortDoubleVec( vector<double> &vecVals, int start, int end )
+{
+//#if 0
+    if( vecVals.size() <= 1)
+    {
+        // do nothing
+        return;
     }
-  }
+//cout << "Before sort, double vec = ";
+//DumpDoubleVec( vecVals );
+	if (end < 0 )
+	{
+		end = vecVals.size() - 1;
+	}
+    int sortLen = end - start +1;
+    double *array = new double[sortLen];
+    for(int i=start; i<= end; ++i)
+    {
+        array[i-start] = vecVals[i];
+    }
+    qsort( (void *)array, sortLen, sizeof( double ), QSortCompareDouble );
+    // Now write back
+    for(int i=start; i<=end; ++i)
+    {
+        vecVals[i] = array[i-start];
+    }
+
+    delete [] array;
+//#endif
+//cout << "After sort, double vec = ";
+//DumpDoubleVec( vecVals );
 }
 
-bool AreTwoSeqsBroken(const SEQUENCE &seqLeft, const SEQUENCE &seqRight) {
-  // test whether the two sequences are broken from a single sequence
-  // to avoid duplicate events mainly
-  bool foundBkpt = false;
-  if (seqLeft.size() != seqRight.size()) {
-    return false;
-  }
 
-  for (int i = 0; i < (int)seqLeft.size(); ++i) {
-    if (IsMissingValueBit(seqLeft[i]) == false &&
-        IsMissingValueBit(seqRight[i]) == false) {
-      return false; // no, not a broken seqs pair
+static int QSortCompareLongDouble( const void *arg1, const void *arg2 )
+{
+   /* Compare all of both strings: */
+    // assume sorting in accending order
+    long double n1 = *((long double *) arg1);
+    long double n2 = *((long double *) arg2);
+//cout <<"arg1 = " << n1 << ", arg2 = " << n2 << endl;
+    if( n1 > n2)
+    {
+        return 1;
+    }
+    else if( n1 < n2)
+    {
+        return -1;
     }
+    else
+    {
+        return 0;
+    }
+}
 
-    if (IsMissingValueBit(seqRight[i]) == false) {
-      if (foundBkpt == false) {
-        foundBkpt = true;
-      }
+
+void SortDoubleVec( vector<long double> &vecVals, int start, int end )
+{
+//#if 0
+    if( vecVals.size() <= 1)
+    {
+        // do nothing
+        return;
+    }
+//cout << "Before sort, double vec = ";
+//DumpDoubleVec( vecVals );
+	if (end < 0 )
+	{
+		end = vecVals.size() - 1;
+	}
+    int sortLen = end - start +1;
+    long double *array = new long double[sortLen];
+    for(int i=start; i<= end; ++i)
+    {
+        array[i-start] = vecVals[i];
     }
-    if (foundBkpt == true && IsMissingValueBit(seqLeft[i]) == false) {
-      return false;
+    qsort( (void *)array, sortLen, sizeof( long double ), QSortCompareLongDouble );
+    // Now write back
+    for(int i=start; i<=end; ++i)
+    {
+        vecVals[i] = array[i-start];
+    }
+
+    delete [] array;
+//#endif
+//cout << "After sort, double vec = ";
+//DumpDoubleVec( vecVals );
+}
+
+void FindUniformColumns( const vector<SEQUENCE> &listSeqs, set<int> &uniSites)
+{
+    uniSites.clear();
+    if( listSeqs.size() == 0 )
+    {
+        return;
+    }
+    int numSites = (int) listSeqs[0].size();
+    for( int i=0; i<numSites; ++i )
+    {
+        bool f0=false, f1=false;
+        for( int r = 0; r<(int)listSeqs.size(); ++r )
+        {
+            if( listSeqs[r][i] == 0 )
+            {
+                f0 = true;
+            }
+            else if(listSeqs[r][i] == 1)
+            {
+                f1 = true;
+            }
+            if( f0 == true && f1 == true )
+            {
+                // not uniform,
+                break;
+            }
+        }
+        if( f0 == false || f1 == false )
+        {
+            // yes, this site is uniform
+            uniSites.insert( i );
+        }
     }
-  }
-  return true;
+}
+
+void BreakSeqAtBkpt( const SEQUENCE &seq, int bkpt, SEQUENCE &seqLeft, SEQUENCE &seqRight  )
+{
+    seqLeft.clear();
+    seqRight.clear();
+    for( int i=0; i<(int)seq.size(); ++i )
+    {
+        if( i <= bkpt )
+        {
+            // then the right seq get MV
+            seqLeft.push_back( seq[i] );
+            seqRight.push_back( MISSING_VALUE_BIT );
+        }
+        else
+        {
+            seqLeft.push_back( MISSING_VALUE_BIT );
+            seqRight.push_back( seq[i] );
+        }
+    }
+}
+
+bool AreTwoSeqsBroken( const SEQUENCE &seqLeft, const SEQUENCE &seqRight )
+{
+    // test whether the two sequences are broken from a single sequence
+    // to avoid duplicate events mainly
+    bool foundBkpt = false;
+    if( seqLeft.size() != seqRight.size() )
+    {
+        return false;
+    }
+
+    for( int i=0; i<(int)seqLeft.size(); ++i)
+    {
+        if(IsMissingValueBit( seqLeft[i] ) == false && IsMissingValueBit( seqRight[i] ) == false)
+        {
+            return false;   // no, not a broken seqs pair
+        }
+
+        if( IsMissingValueBit( seqRight[i] ) == false )
+        {
+            if( foundBkpt == false )
+            {
+                foundBkpt = true;
+            }
+        }
+        if( foundBkpt == true && IsMissingValueBit( seqLeft[i] ) == false )
+        {
+            return false;
+        }
+    }
+    return true;
 }
 
 // new stuff from treeHMM
 
-bool GetFirstMutliChoice(int numStage, int numStageElem,
-                         vector<int> &initChoice) {
-  if (numStage <= 0 || numStageElem <= 0) {
-    return false;
-  }
-  initChoice.clear();
-  // Start by picking first one each time
-  for (int i = 0; i < numStage; ++i) {
-    initChoice.push_back(0);
-  }
-  return true;
+bool GetFirstMutliChoice( int numStage, int numStageElem, vector<int> &initChoice )
+{
+    if( numStage <= 0 || numStageElem <= 0 )
+    {
+        return false;
+    }
+    initChoice.clear();
+    // Start by picking first one each time
+    for( int i=0; i<numStage; ++i )
+    {
+        initChoice.push_back( 0 );
+    }
+    return true;
 }
 
-bool GetNextMutliChoice(int numStage, int numStageElem,
-                        vector<int> &indChoice) {
-  // Now we move to next choice
-  // bool res = false;
-  // Find the last item not = numStageElem-1
-  int itemToChange = -1;
-  for (int i = ((int)indChoice.size()) - 1; i >= 0; --i) {
-    if (indChoice[i] < numStageElem - 1) {
-      itemToChange = i;
-      break;
+bool GetNextMutliChoice( int numStage, int numStageElem, vector<int> &indChoice )
+{
+    // Now we move to next choice
+    //bool res = false;
+    // Find the last item not = numStageElem-1
+    int itemToChange  = -1;
+    for(  int i= ((int)indChoice.size())-1; i>=0;    --i )
+    {
+        if( indChoice[i] <  numStageElem-1 )
+        {
+            itemToChange = i;
+            break;
+        }
     }
-  }
-  if (itemToChange < 0) {
-    // No solution
-    return false;
-  }
-  // Now we clear out everything beyond it
-  for (int i = itemToChange + 1; i < (int)indChoice.size(); ++i) {
-    indChoice[i] = 0;
-  }
-  indChoice[itemToChange]++;
-  return true;
+    if( itemToChange < 0 )
+    {
+        // No solution
+        return false;
+    }
+    // Now we clear out everything beyond it
+    for( int i= itemToChange+1; i<(int) indChoice.size(); ++i)
+    {
+        indChoice[i] = 0;
+    }
+    indChoice[itemToChange] ++;
+    return true;
 }
 
-// void DumpVecSequences( const vector<SEQUENCE> &vecSeqs )
+//void DumpVecSequences( const vector<SEQUENCE> &vecSeqs )
 //{
 //    cout << "Vector of sequneces = \n";
 //    for( unsigned int i=0; i<vecSeqs.size(); ++i )
@@ -502,492 +626,574 @@ bool GetNextMutliChoice(int numStage, int numStageElem,
 //    }
 //}
 
-void GetVecSequencesIV(const vector<SEQUENCE> &vecSeqs, int left, int right,
-                       vector<SEQUENCE> &vecSeqsIV) {
-  vecSeqsIV.clear();
-  for (unsigned int i = 0; i < vecSeqs.size(); ++i) {
-    SEQUENCE ivRow;
-    GetSeqInterval(vecSeqs[i], ivRow, left, right);
-    vecSeqsIV.push_back(ivRow);
-  }
+void GetVecSequencesIV( const vector<SEQUENCE> &vecSeqs, int left, int right, vector<SEQUENCE> &vecSeqsIV )
+{
+    vecSeqsIV.clear();
+    for(unsigned int i=0;i<vecSeqs.size(); ++i)
+    {
+        SEQUENCE ivRow;
+        GetSeqInterval(vecSeqs[i], ivRow, left, right);
+        vecSeqsIV.push_back( ivRow );
+    }
 }
 
-int GetNumZerosInSeq(const SEQUENCE &seq) {
-  int res = 0;
-  for (unsigned int i = 0; i < seq.size(); ++i) {
-    if (seq[i] == 0) {
-      res++;
+int GetNumZerosInSeq(const SEQUENCE &seq)
+{
+    int res = 0;
+    for(unsigned int i=0; i<seq.size(); ++i)
+    {
+        if( seq[i] == 0 )
+        {
+            res++;
+        }
     }
-  }
-  return res;
+    return res;
 }
 
-void GetSeqSplit(const SEQUENCE &seq, set<int> &zeroBits, set<int> &oneBits) {
-  zeroBits.clear();
-  oneBits.clear();
-  for (unsigned int i = 0; i < seq.size(); ++i) {
-    if (seq[i] == 0) {
-      zeroBits.insert(i);
-    } else if (seq[i] == 1) // need to enforce this due to potential missing
-                            // value, 7/4/08
+void GetSeqSplit(const SEQUENCE &seq, set<int> &zeroBits, set<int> &oneBits)
+{
+    zeroBits.clear();
+    oneBits.clear();
+    for(unsigned int i=0; i<seq.size(); ++i)
     {
-      oneBits.insert(i);
+        if( seq[i] == 0 )
+        {
+            zeroBits.insert( i );
+        }
+        else if( seq[i] == 1 )		// need to enforce this due to potential missing value, 7/4/08
+        {
+            oneBits.insert( i );
+        }
     }
-  }
 }
 
-static int QSortCompareIntPair(const void *arg1, const void *arg2) {
-  /* Compare all of both strings: */
-  // assume sorting in accending order, and use the first value in the int pair
-  // to sort
-  int n1 = ((pair<int, int> *)arg1)->first;
-  int n2 = ((pair<int, int> *)arg2)->first;
-  // cout <<"arg1 = " << n1 << ", arg2 = " << n2 << endl;
-  if (n1 > n2) {
-    return 1;
-  } else if (n1 < n2) {
-    return -1;
-  } else {
-    return 0;
-  }
+static int QSortCompareIntPair( const void *arg1, const void *arg2 )
+{
+   /* Compare all of both strings: */
+    // assume sorting in accending order, and use the first value in the int pair to sort
+    int n1 = ((pair<int,int> *) arg1)->first;
+    int n2 = ((pair<int,int> *) arg2)->first;
+//cout <<"arg1 = " << n1 << ", arg2 = " << n2 << endl;
+    if( n1 > n2)
+    {
+        return 1;
+    }
+    else if( n1 < n2)
+    {
+        return -1;
+    }
+    else
+    {
+        return 0;
+    }
 }
 
-void SortVecIntPairs(vector<pair<int, int> > &listPairs) {
-  pair<int, int> *parray = new pair<int, int>[listPairs.size()];
-  for (int i = 0; i < (int)listPairs.size(); ++i) {
-    parray[i] = listPairs[i];
-  }
-  qsort((void *)parray, listPairs.size(), sizeof(pair<int, int>),
-        QSortCompareIntPair);
-  // Now write back
-  for (int i = 0; i < (int)listPairs.size(); ++i) {
-    listPairs[i] = parray[i];
-  }
+void SortVecIntPairs(vector<pair<int,int> > &listPairs )
+{
+    pair<int,int> *parray = new  pair<int,int>[ listPairs.size()  ];
+    for(int i=0; i< (int)listPairs.size() ; ++i)
+    {
+        parray[i] = listPairs[i];
+    }
+    qsort( (void *)parray, listPairs.size(), sizeof( pair<int,int>  ), QSortCompareIntPair );
+    // Now write back
+    for(int i=0; i< (int)listPairs.size(); ++i)
+    {
+        listPairs[i] = parray[i];
+    }
 
-  delete[] parray;
+    delete [] parray;
 }
 
-////////////////////////////////////////////////////////////////////////////////
-int GetSubstringLeftPos(const INTERVAL_SUBSTRING &substr) {
-  return substr.first.first;
+
+
+///////////////////////////////////////////////////////////////////////////////////////////
+int GetSubstringLeftPos( const INTERVAL_SUBSTRING &substr )
+{
+    return substr.first.first;
 }
-int GetSubstringRightPos(const INTERVAL_SUBSTRING &substr) {
-  return substr.first.second;
+int GetSubstringRightPos( const INTERVAL_SUBSTRING &substr )
+{
+    return substr.first.second;
 }
-void GetIVSubstringData(const INTERVAL_SUBSTRING &substr, SEQUENCE &seq) {
-  seq = substr.second;
+void GetIVSubstringData(const INTERVAL_SUBSTRING &substr, SEQUENCE &seq )
+{
+    seq = substr.second;
 }
-INTERVAL GetSubstringInterval(const INTERVAL_SUBSTRING &substr) {
-  return substr.first;
+INTERVAL GetSubstringInterval( const INTERVAL_SUBSTRING &substr)
+{
+    return substr.first;
 }
-bool GetSubstringSegment(const INTERVAL_SUBSTRING &substr,
-                         const INTERVAL &ivToRead, SEQUENCE &segment) {
-  YW_ASSERT_INFO(IsIntervalContained(ivToRead, substr.first) == true,
-                 "Two intervals do not have contained");
+bool GetSubstringSegment(const INTERVAL_SUBSTRING &substr, const INTERVAL &ivToRead, SEQUENCE &segment)
+{
+    YW_ASSERT_INFO( IsIntervalContained(ivToRead, substr.first) == true, "Two intervals do not have contained" );
 
-  // remember we have to offset a little
-  int startPos = GetSubstringLeftPos(substr);
-  GetSeqInterval(substr.second, segment, ivToRead.first - startPos,
-                 ivToRead.second - startPos);
-  return true;
+    // remember we have to offset a little
+    int startPos = GetSubstringLeftPos( substr );
+    GetSeqInterval(substr.second, segment, ivToRead.first-startPos, ivToRead.second-startPos);
+    return true;
 }
 
-int GetSubstringValAt(const INTERVAL_SUBSTRING &substr, int pos) {
-  YW_ASSERT_INFO(pos >= GetSubstringLeftPos(substr) &&
-                     pos <= GetSubstringRightPos(substr),
-                 "Range error.");
+int GetSubstringValAt( const INTERVAL_SUBSTRING &substr, int pos )
+{
+    YW_ASSERT_INFO( pos >= GetSubstringLeftPos(substr) && pos <= GetSubstringRightPos(substr), "Range error." );
 
-  int convPos = pos - GetSubstringLeftPos(substr);
-  return substr.second[convPos];
+    int convPos = pos - GetSubstringLeftPos( substr );
+    return substr.second[ convPos ];
 }
 
-bool IsSegmentContained(const INTERVAL_SUBSTRING &seqContained,
-                        const INTERVAL_SUBSTRING &seqContainer) {
-  // First the range has to match
-  if (GetSubstringLeftPos(seqContained) < GetSubstringLeftPos(seqContainer) ||
-      GetSubstringRightPos(seqContained) > GetSubstringRightPos(seqContainer)) {
-    return false;
-  }
-  // Then the corresponding position must match too
-  for (int p = GetSubstringLeftPos(seqContained);
-       p <= GetSubstringRightPos(seqContained); p++) {
-    if (GetSubstringValAt(seqContained, p) !=
-        GetSubstringValAt(seqContainer, p)) {
-      return false;
+bool IsSegmentContained( const INTERVAL_SUBSTRING &seqContained, const INTERVAL_SUBSTRING& seqContainer   )
+{
+    // First the range has to match
+    if( GetSubstringLeftPos(seqContained)  < GetSubstringLeftPos(seqContainer)  ||
+        GetSubstringRightPos(seqContained)  > GetSubstringRightPos(seqContainer)  )
+    {
+        return false;
     }
-  }
-  return true;
+    // Then the corresponding position must match too
+    for( int p = GetSubstringLeftPos(seqContained); p<= GetSubstringRightPos(seqContained); p++ )
+    {
+        if( GetSubstringValAt(seqContained, p)  !=  GetSubstringValAt( seqContainer, p) )
+        {
+            return false;
+        }
+    }
+    return true;
 }
 
-bool AreSegmentsConsistent(const INTERVAL_SUBSTRING &seq1,
-                           const INTERVAL_SUBSTRING &seq2) {
-  // If disjoint, yes, it is consistent
-  INTERVAL ivInt;
-  bool fInt = GetIntervalOverlap(GetSubstringInterval(seq1),
-                                 GetSubstringInterval(seq2), ivInt);
-  if (fInt == false) {
-    return true;
-  }
-  // cout << "ivInt.first = " << ivInt.first << ", ivInt.second = " <<
-  // ivInt.second << endl;
-  // make sure the two things matches
-  SEQUENCE seqp1;
-  GetSubstringSegment(seq1, ivInt, seqp1);
-  // cout << "seqp1 = ";
-  // DumpSequence( seqp1 );
-  SEQUENCE seqp2;
-  GetSubstringSegment(seq2, ivInt, seqp2);
-  // cout << "seqp2 = ";
-  // DumpSequence( seqp2 );
-
-  if (seqp1 == seqp2) {
-    return true;
-  } else {
-    return false;
-  }
+bool AreSegmentsConsistent( const INTERVAL_SUBSTRING &seq1, const INTERVAL_SUBSTRING& seq2 )
+{
+    // If disjoint, yes, it is consistent
+    INTERVAL ivInt;
+    bool fInt = GetIntervalOverlap( GetSubstringInterval(seq1), GetSubstringInterval(seq2), ivInt);
+    if( fInt == false  )
+    {
+        return true;
+    }
+//cout << "ivInt.first = " << ivInt.first << ", ivInt.second = " << ivInt.second << endl;
+    // make sure the two things matches
+    SEQUENCE seqp1;
+    GetSubstringSegment( seq1, ivInt, seqp1);
+//cout << "seqp1 = ";
+//DumpSequence( seqp1 );
+    SEQUENCE seqp2;
+    GetSubstringSegment( seq2, ivInt, seqp2);
+//cout << "seqp2 = ";
+//DumpSequence( seqp2 );
+
+    if( seqp1 == seqp2 )
+    {
+        return true;
+    }
+    else
+    {
+        return false;
+    }
+
 }
 
-int GetSegmentsIntersection(const INTERVAL_SUBSTRING &seq1,
-                            const INTERVAL_SUBSTRING &seq2, INTERVAL &iv) {
-  // we simply get how larget the intersection from the interval ONLY
+int GetSegmentsIntersection( const INTERVAL_SUBSTRING &seq1, const INTERVAL_SUBSTRING& seq2, INTERVAL &iv )
+{
+    // we simply get how larget the intersection from the interval ONLY
 
-  bool fInt = GetIntervalOverlap(GetSubstringInterval(seq1),
-                                 GetSubstringInterval(seq2), iv);
-  if (fInt == false) {
-    return 0;
-  }
-  return iv.second - iv.first + 1;
-}
-
-bool AreSegmentsNextto(const INTERVAL_SUBSTRING &seq1,
-                       const INTERVAL_SUBSTRING &seq2) {
-  // cout << "seq1.left = " <<  GetSubstringLeftPos(seq1) << ", right = " <<
-  // GetSubstringRightPos(seq1) << endl; cout << "seq2.left = " <<
-  // GetSubstringLeftPos(seq2) << ", right = " <<  GetSubstringRightPos(seq2) <<
-  // endl;
-  // Two segments are next to each other if the can form a single bigger
-  // ungapped piece
-  if (GetSubstringLeftPos(seq1) == GetSubstringRightPos(seq2) + 1 ||
-      GetSubstringLeftPos(seq2) == GetSubstringRightPos(seq1) + 1) {
-    // cout << "Yes, neighbours.\n";
-    return true;
-  } else {
-    // cout << "No, not neighbours.\n";
-    return false;
-  }
+    bool fInt = GetIntervalOverlap( GetSubstringInterval(seq1), GetSubstringInterval(seq2), iv);
+    if( fInt == false  )
+    {
+        return 0;
+    }
+    return iv.second - iv.first+1;
 }
 
-void DumpSubstring(const INTERVAL_SUBSTRING &substr) {
-  cout << "[" << GetSubstringLeftPos(substr) << ",";
-  cout << GetSubstringRightPos(substr) << "], ";
-  DumpSequence(substr.second);
+bool AreSegmentsNextto( const INTERVAL_SUBSTRING &seq1, const INTERVAL_SUBSTRING& seq2 )
+{
+//cout << "seq1.left = " <<  GetSubstringLeftPos(seq1) << ", right = " <<  GetSubstringRightPos(seq1) << endl;
+//cout << "seq2.left = " <<  GetSubstringLeftPos(seq2) << ", right = " <<  GetSubstringRightPos(seq2) << endl;
+    // Two segments are next to each other if the can form a single bigger ungapped piece
+    if( GetSubstringLeftPos(seq1) == GetSubstringRightPos(seq2) + 1
+        || GetSubstringLeftPos(seq2) == GetSubstringRightPos(seq1) + 1  )
+    {
+//cout << "Yes, neighbours.\n";
+        return true;
+    }
+    else
+    {
+//cout << "No, not neighbours.\n";
+        return false;
+    }
+}
+
+void DumpSubstring( const INTERVAL_SUBSTRING &substr )
+{
+    cout << "[" << GetSubstringLeftPos(substr) << ",";
+    cout << GetSubstringRightPos(substr) << "], ";
+    DumpSequence( substr.second );
 }
 
 // ***************************************************************************
 // Numerical utilities
 // ***************************************************************************
-double GetLogSumOfLogs(const vector<double> &listLogs) {
-  if (listLogs.size() == 0) {
-    // nothing to process
-    return 0.0;
-  }
-  // given a list of log terms, compute the sum of prob (need to take exp)
-  // and express the sum in the log again
-  // first get the largest term and use it as a base
-  int posmax = GetLargestIndiceInDoubleVec(listLogs);
-  double valmax = listLogs[posmax];
-  double asum = 0.0;
-  for (int i = 0; i < (int)listLogs.size(); ++i) {
-    asum += exp(listLogs[i] - valmax);
-  }
-  double res = valmax + log(asum);
-  // cout << "res = " << res << ", valmax = " << valmax << ", in list: ";
-  // DumpDoubleVec(listLogs);
-  // cout << "Direct evaluation = " << GetLogSumOfLogsDirect(listLogs) << endl;
-  return res;
-}
-
-double GetLogSumOfLogsDirect(const vector<double> &listLogs) {
-  // simply just direct sum over
-  double asum = 0.0;
-  for (int i = 0; i < (int)listLogs.size(); ++i) {
-    asum += exp(listLogs[i]);
-  }
-  return log(asum);
+double GetLogSumOfLogs(const vector<double> &listLogs)
+{
+	if( listLogs.size() == 0)
+	{
+		// nothing to process
+		return 0.0;
+	}
+	// given a list of log terms, compute the sum of prob (need to take exp)
+	// and express the sum in the log again
+	// first get the largest term and use it as a base
+	int posmax = GetLargestIndiceInDoubleVec(listLogs);
+	double valmax = listLogs[posmax];
+	double asum = 0.0;
+	for(int i=0; i<(int)listLogs.size(); ++i)
+	{
+		asum += exp(listLogs[i] - valmax);
+	}
+	double res = valmax+log(asum);
+//cout << "res = " << res << ", valmax = " << valmax << ", in list: ";
+//DumpDoubleVec(listLogs);
+//cout << "Direct evaluation = " << GetLogSumOfLogsDirect(listLogs) << endl;
+	return res;
+}
+
+double GetLogSumOfLogsDirect(const vector<double> &listLogs)
+{
+	// simply just direct sum over
+	double asum = 0.0;
+	for(int i=0; i<(int)listLogs.size(); ++i)
+	{
+		asum += exp(listLogs[i]);
+	}
+	return log(asum);
 }
 
-double GetLogSumOfTwo(double logv1, double logv2) {
-  vector<double> vecVals;
-  vecVals.push_back(logv1);
-  vecVals.push_back(logv2);
-  return GetLogSumOfLogs(vecVals);
+double GetLogSumOfTwo(double logv1, double logv2)
+{
+	vector<double> vecVals;
+	vecVals.push_back( logv1);
+	vecVals.push_back( logv2);
+	return GetLogSumOfLogs(vecVals);
 }
 
-void SumofLogVecs(vector<double> &listLogsAdded,
-                  vector<double> &listLogsAdding) {
-  YW_ASSERT_INFO(listLogsAdded.size() == listLogsAdding.size(),
-                 "Must have the same length");
-  for (int i = 0; i < (int)listLogsAdded.size(); ++i) {
-    listLogsAdded[i] = GetLogSumOfTwo(listLogsAdded[i], listLogsAdding[i]);
-  }
+void SumofLogVecs( vector<double> &listLogsAdded, vector<double> &listLogsAdding )
+{
+    YW_ASSERT_INFO(listLogsAdded.size() == listLogsAdding.size(), "Must have the same length" );
+    for( int i=0; i<(int)listLogsAdded.size(); ++i )
+    {
+        listLogsAdded[i] = GetLogSumOfTwo( listLogsAdded[i], listLogsAdding[i] );
+    }
 }
 
-////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////
 // More useful functions
 
 // This is a very useful function, so expose it
-int FindMatchedSeqForFounders(const vector<SEQUENCE> &founder,
-                              const SEQUENCE &seq, set<int> &endRows,
-                              bool fPrefix) {
-  // Return the number of crossovers
-  // This function computes the minimum recombination weight for the given
-  // hapRow when restricted to interval [left, right] in mat
-  int res = 0;
-
-  set<int> lastTrackRows; // set of rows that matching the hapRow
-
-  // Ohterwise, we can start from all possible rows
-  for (unsigned int i = 0; i < founder.size(); ++i) {
-    lastTrackRows.insert(i);
-  }
-
-  int curpos = 0;
-  int end = seq.size();
-  if (fPrefix == false) {
-    curpos = seq.size() - 1;
-    end = -1;
-  }
-
-  while (curpos != end) {
-    // Each time, we intersect the set with the sets matching the current bit
-    set<int> trackRows;
-    for (unsigned int i = 0; i < founder.size(); ++i) {
-      if (IsTwoStatesCompatible(founder[i][curpos], seq[curpos]) == true) {
-        // Yes, this row matches
-        trackRows.insert(i);
-      }
+int FindMatchedSeqForFounders( const vector<SEQUENCE> &founder, const SEQUENCE &seq,
+                                  set<int> &endRows, bool fPrefix)
+{
+    // Return the number of crossovers
+    // This function computes the minimum recombination weight for the given hapRow
+    // when restricted to interval [left, right] in mat
+    int res = 0;
+
+    set<int> lastTrackRows;  // set of rows that matching the hapRow
+
+    // Ohterwise, we can start from all possible rows
+    for( unsigned int i=0; i<founder.size(); ++i )
+    {
+        lastTrackRows.insert( i );
     }
 
-    // Now we test if there is intersection, if non-empty, we contiinue
-    set<int> sint;
-    JoinSets(trackRows, lastTrackRows, sint);
-    if (sint.size() == 0) {
-      break;
-    } else {
-      // In this case, we still continue
-      lastTrackRows = sint;
+    int curpos  = 0;
+    int end = seq.size();
+    if( fPrefix == false )
+    {
+        curpos = seq.size()-1;
+        end = -1;
     }
 
-    if (fPrefix == true) {
-      curpos++;
-    } else {
-      curpos--;
+
+    while( curpos != end )
+    {
+        // Each time, we intersect the set with the sets matching the current bit
+        set<int> trackRows;
+        for(unsigned int i=0; i<founder.size(); ++i)
+        {
+            if( IsTwoStatesCompatible( founder[i][curpos] , seq[curpos]) == true )
+            {
+                // Yes, this row matches
+                trackRows.insert( i );
+            }
+        }
+
+        // Now we test if there is intersection, if non-empty, we contiinue
+        set<int> sint;
+        JoinSets(trackRows, lastTrackRows, sint);
+        if(sint.size() == 0)
+        {
+            break;
+        }
+        else
+        {
+            // In this case, we still continue
+            lastTrackRows = sint;
+        }
+
+        if( fPrefix == true)
+        {
+            curpos++;
+        }
+        else
+        {
+            curpos--;
+        }
     }
-  }
 
-  endRows = lastTrackRows;
+    endRows = lastTrackRows;
 
-  // what is the length of the prefix/suffix
-  if (fPrefix) {
-    res = curpos;
-  } else {
-    res = seq.size() - 1 - curpos;
-  }
+    // what is the length of the prefix/suffix
+    if( fPrefix )
+    {
+        res = curpos;
+    }
+    else
+    {
+        res = seq.size() - 1 - curpos;
+    }
 
-  return res;
+    return res;
 }
 
-int FindNoninformativeRow(const vector<SEQUENCE> &listSeqs, int col) {
-  int numZeros = 0, numOnes = 0, numMissing = 0;
-  // now we compare these two cols: c1, c2
-  // if they match, we put c2 into set
-  int res0 = -1, res1 = -1;
-  for (unsigned int r = 0; r < listSeqs.size(); ++r) {
-    if (listSeqs[r][col] == 0) {
-      numZeros++;
-      res0 = r;
-    } else if (listSeqs[r][col] == 1) {
-      numOnes++;
-      res1 = r;
-    } else if (IsMissingValueBit(listSeqs[r][col]) == true) {
-      numMissing++;
+int FindNoninformativeRow( const vector<SEQUENCE> &listSeqs, int col)
+{
+	int numZeros = 0, numOnes = 0, numMissing = 0;
+	// now we compare these two cols: c1, c2
+	// if they match, we put c2 into set
+    int res0 = -1, res1 = -1;
+	for(unsigned int r = 0; r< listSeqs.size(); ++r)
+	{
+		if(listSeqs[r][col] == 0)
+		{
+			numZeros ++;
+            res0 = r;
+		}
+		else if(listSeqs[r][col] == 1)
+		{
+			numOnes ++;
+            res1 = r;
+		}
+        else if(  IsMissingValueBit( listSeqs[r][col] ) == true  )
+        {
+            numMissing++;
+        }
+        if(  numZeros > 1 && numOnes > 1 )
+        {
+            return -1;  // no such row
+        }
+	}
+
+	// Check to see if this is non-informative
+	if( numZeros ==1  && numOnes >= 1  )
+	{
+		// we find a duplicate
+//			cout << "Site  " << c1+1 << "is  non-informative" << endl;
+        return res0;
+	}
+    else if( numOnes == 1  && numZeros >= 1)
+    {
+        return res1;
     }
-    if (numZeros > 1 && numOnes > 1) {
-      return -1; // no such row
+    else
+    {
+        return -1;
     }
-  }
-
-  // Check to see if this is non-informative
-  if (numZeros == 1 && numOnes >= 1) {
-    // we find a duplicate
-    //			cout << "Site  " << c1+1 << "is  non-informative" <<
-    // endl;
-    return res0;
-  } else if (numOnes == 1 && numZeros >= 1) {
-    return res1;
-  } else {
-    return -1;
-  }
 }
 
-void ConvVecToArray(const vector<int> &vec, int *arr) {
-  // IMPORTANT: ASSUME ARR HAS BEEN ALLOCATED TO PROPER SIZE!!
-  for (int i = 0; i < (int)vec.size(); ++i) {
-    arr[i] = vec[i];
-  }
+void ConvVecToArray( const vector<int> &vec, int *arr )
+{
+    // IMPORTANT: ASSUME ARR HAS BEEN ALLOCATED TO PROPER SIZE!!
+    for(int i=0; i<(int)vec.size(); ++i)
+    {
+        arr[i] = vec[i];
+    }
 }
-void ConvVecToArray(const vector<double> &vec, double *arr) {
-  // IMPORTANT: ASSUME ARR HAS BEEN ALLOCATED TO PROPER SIZE!!
-  for (int i = 0; i < (int)vec.size(); ++i) {
-    arr[i] = vec[i];
-  }
+void ConvVecToArray( const vector<double> &vec, double *arr )
+{
+    // IMPORTANT: ASSUME ARR HAS BEEN ALLOCATED TO PROPER SIZE!!
+    for(int i=0; i<(int)vec.size(); ++i)
+    {
+        arr[i] = vec[i];
+    }
 }
-void DumpIntArray(int len, int *arr) {
-  for (int i = 0; i < len; ++i) {
-    cout << arr[i];
-    if (i < len - 1) {
-      cout << ", ";
+void DumpIntArray(int len, int *arr)
+{
+    for(int i=0; i<len; ++i)
+    {
+        cout <<  arr[i] ;
+        if( i < len -1)
+        {
+            cout << ", ";
+        }
     }
-  }
-  cout << endl;
+    cout << endl;
 }
-void FlipBinVector(vector<int> &vec) {
-  for (int i = 0; i < (int)vec.size(); ++i) {
-    if (vec[i] == 0) {
-      vec[i] = 1;
-    } else {
-      vec[i] = 0;
+void FlipBinVector(vector<int> &vec)
+{
+    for(int i=0; i<(int)vec.size(); ++i)
+    {
+        if( vec[i] == 0 )
+        {
+            vec[i] = 1;
+        }
+        else
+        {
+            vec[i] = 0;
+        }
     }
-  }
 }
 
-void RecoverOrigIndicesAfterDeletion(const vector<int> &removedItems,
-                                     const vector<int> &itemsNew,
-                                     vector<int> &itemsOrigIndices) {
-  // this function is for reconstructing the orignal indices of items
-  // after some items were deleted from array, and the passed-in indices are for
-  // the NEW positions. We are interested to know the origianl positions
-  itemsOrigIndices.clear();
+void RecoverOrigIndicesAfterDeletion( const vector<int> &removedItems, const vector<int> &itemsNew,
+									 vector<int> &itemsOrigIndices )
+{
+	// this function is for reconstructing the orignal indices of items
+	// after some items were deleted from array, and the passed-in indices are for
+	// the NEW positions. We are interested to know the origianl positions
+	itemsOrigIndices.clear();
 
-  // first sort the two arrays
-  vector<int> removedItemsUse = removedItems;
-  vector<int> itemsNewUse = itemsNew;
-  SortIntVec(removedItemsUse);
-  SortIntVec(itemsNewUse);
+	// first sort the two arrays
+	vector<int> removedItemsUse = removedItems;
+	vector<int> itemsNewUse = itemsNew;
+	SortIntVec(removedItemsUse);
+	SortIntVec(itemsNewUse);
 
-  int posNew = 0;
-  for (int i = 0; i < (int)removedItemsUse.size(); ++i) {
-    //
-    // int posDel = removedItemsUse[i];
+	int posNew = 0;
+	for( int i=0; i<(int)removedItemsUse.size(); ++i )
+	{
+		//
+		//int posDel = removedItemsUse[i];
 
-    // output anything that is smaller or equal to this number
-    while (posNew < (int)itemsNewUse.size() &&
-           itemsNewUse[posNew] < removedItemsUse[i] - i) {
-      // convert it
-      itemsOrigIndices.push_back(itemsNewUse[posNew] + i);
-      posNew++;
-    }
+		// output anything that is smaller or equal to this number
+		while(  posNew <(int) itemsNewUse.size() &&  itemsNewUse[posNew] < removedItemsUse[i] - i )
+		{
+			// convert it
+			itemsOrigIndices.push_back( itemsNewUse[posNew] + i );
+			posNew++;
+		}
 
-    // stop if nothing left
-    if (posNew >= (int)itemsNewUse.size()) {
-      break;
-    }
-  }
-  // also output things left over
-  for (; posNew < (int)itemsNewUse.size(); ++posNew) {
-    //
-    itemsOrigIndices.push_back(itemsNewUse[posNew] + removedItemsUse.size());
-  }
-  // cout << "removedItems = ";
-  // DumpIntVec( removedItems );
-  // cout << "cur items = ";
-  // DumpIntVec(itemsNew);
-  // cout << "Converted items = ";
-  // DumpIntVec(  itemsOrigIndices );
-}
-
-void GetOrigPositionAfterRemoval(int numRemains,
-                                 const vector<int> &itemsRemoved,
-                                 vector<int> &origPosForRemains) {
-  // for now, choose a simple but NOT EFFICIENT way. TBD
-  // try to get original positions of the item removal from a list
-  // for example, say 3 items remains and itemsREmoved = 1,2 (0-based), then
-  // orig pos for remaings = 0, 3,4
-  set<int> setItemRemoved;
-  PopulateSetByVec(setItemRemoved, itemsRemoved);
-  set<int> setItemsOrig;
-  PopulateSetWithInterval(setItemsOrig, 0,
-                          numRemains + (int)itemsRemoved.size());
-  // substract something
-  SubtractSets(setItemsOrig, setItemRemoved);
-  // now result
-  PopulateVecBySet(origPosForRemains, setItemsOrig);
-
-  // for(int i=0; i<numRemains + (int)itemsRemoved.size(); ++i)
-  //{
-  //		if()
-  //	{
-  //	}
-  //}
-}
-
-void ConvOneSideToFullSplit(vector<int> &split, const set<int> &oneside,
-                            int numLeaves, int val) {
-  split.resize(numLeaves);
-  int val0 = 0;
-  if (val == 0) {
-    val0 = 1;
-  }
-  for (int i = 0; i < numLeaves; ++i) {
-    split[i] = val0;
-  }
-  for (set<int>::iterator it = oneside.begin(); it != oneside.end(); ++it) {
-    split[*it] = val;
-  }
+		// stop if nothing left
+		if( posNew >=(int)itemsNewUse.size() )
+		{
+			break;
+		}
+	}
+	// also output things left over
+	for( ; posNew < (int)itemsNewUse.size(); ++posNew )
+	{
+		//
+		itemsOrigIndices.push_back( itemsNewUse[posNew] + removedItemsUse.size() );
+	}
+//cout << "removedItems = ";
+//DumpIntVec( removedItems );
+//cout << "cur items = ";
+//DumpIntVec(itemsNew);
+//cout << "Converted items = ";
+//DumpIntVec(  itemsOrigIndices );
+}
+
+
+void GetOrigPositionAfterRemoval( int numRemains, const vector<int> &itemsRemoved,  vector<int> & origPosForRemains)
+{
+	// for now, choose a simple but NOT EFFICIENT way. TBD
+	// try to get original positions of the item removal from a list
+	// for example, say 3 items remains and itemsREmoved = 1,2 (0-based), then orig pos for remaings = 0, 3,4
+	set<int> setItemRemoved;
+	PopulateSetByVec( setItemRemoved,  itemsRemoved );
+	set<int> setItemsOrig;
+	PopulateSetWithInterval( setItemsOrig, 0, numRemains + (int)itemsRemoved.size()  );
+	// substract something
+	SubtractSets(setItemsOrig, setItemRemoved);
+	// now result
+	PopulateVecBySet( origPosForRemains, setItemsOrig);
+
+	//for(int i=0; i<numRemains + (int)itemsRemoved.size(); ++i)
+	//{
+	//		if()
+	//	{
+	//	}
+	//}
+}
+
+void ConvOneSideToFullSplit( vector<int> &split, const set<int> &oneside, int numLeaves, int val )
+{
+	split.resize(numLeaves);
+	int val0 = 0;
+	if( val == 0 )
+	{
+		val0 = 1;
+	}
+	for(int i=0; i<numLeaves; ++i)
+	{
+		split[i] = val0;
+	}
+	for(set<int> :: iterator it = oneside.begin(); it != oneside.end(); ++it)
+	{
+		split[ *it ] = val;
+	}
 }
 
-bool AreTwoMVVecCompat(const vector<int> &vec1, const vector<int> &vec2,
-                       int &numTrueMatch) {
-  YW_ASSERT_INFO(vec1.size() == vec2.size(), "Fail");
-  numTrueMatch = 0;
-  int mres = 0;
-  for (int i = 0; i < (int)vec1.size(); ++i) {
-    if (IsMissingValueBit(vec1[i]) == true ||
-        IsMissingValueBit(vec2[i]) == true) {
-      // match
-      continue;
-    } else if (vec1[i] != vec2[i]) {
-      return false;
-    } else {
-      // true match
-      mres++;
-    }
-  }
-  numTrueMatch = mres;
-  return true;
+bool AreTwoMVVecCompat(const vector<int> &vec1, const vector<int> &vec2, int &numTrueMatch)
+{
+	YW_ASSERT_INFO( vec1.size() == vec2.size(), "Fail" );
+	numTrueMatch = 0;
+	int mres = 0;
+	for(int i=0; i<(int) vec1.size(); ++i)
+	{
+		if( IsMissingValueBit( vec1[i] ) == true || IsMissingValueBit( vec2[i] ) == true )
+		{
+			// match
+			continue;
+		}
+		else if( vec1[i]  != vec2[i]  )
+		{
+			return false;
+		}
+		else
+		{
+			// true match
+			mres ++;
+		}
+	}
+	numTrueMatch = mres;
+	return true;
 }
 
-int GetMVNum(const vector<int> &vec) {
-  int res = 0;
-  for (int i = 0; i < (int)vec.size(); ++i) {
-    if (IsMissingValueBit(vec[i]) == true) {
-      res++;
-    }
-  }
-  return res;
+int GetMVNum(const vector<int> &vec)
+{
+	int res = 0;
+	for(int i=0; i<(int)vec.size(); ++i)
+	{
+		if(IsMissingValueBit( vec[i] ) == true )
+		{
+			res ++;
+		}
+	}
+	return res;
 }
 
-bool AreSeqsOverlap(const vector<int> &vec1, const vector<int> &vec2) {
-  for (int i = 0; i < (int)vec1.size(); ++i) {
-    if (IsMissingValueBit(vec1[i]) == false &&
-        IsMissingValueBit(vec2[i]) == false) {
-      return true;
-    }
-  }
-  return false;
+bool AreSeqsOverlap(const vector<int> &vec1, const vector<int> &vec2)
+{
+	for(int i=0; i<(int)vec1.size(); ++i)
+	{
+		if(IsMissingValueBit( vec1[i] ) == false  && IsMissingValueBit( vec2[i] ) == false )
+		{
+			return true;
+		}
+	}
+	return false;
 }
 
-void InsertOrderedVec(vector<int> &vec, int val) {
+void InsertOrderedVec( vector<int> &vec, int val)
+{
 #if 0
 	// assume vec is already ordered and we will add a new val to keep vec ordered
 	// IMPORTANT: remove duplicate copy if any
@@ -1009,27 +1215,30 @@ void InsertOrderedVec(vector<int> &vec, int val) {
 	vec = vecRes;
 #endif
 
-  if (vec.size() == 0) {
-    vec.push_back(val);
-    return;
-  }
+	if( vec.size() == 0)
+	{
+		vec.push_back( val );
+		return;
+	}
 
-  // cout << "In InsertOrderedVec: val = " << val << ", vec = ";
-  // DumpIntVec( vec );
-  // want to insert the item in space
-  // first find the location to add this item
-  // doing it in binary search
-  int pos = binary_search<int>(vec, 0, vec.size() - 1, val);
-  YW_ASSERT_INFO(pos >= 0, "Wrong in binary search");
-  // cout << "pos = " << pos << endl;
-  if (pos >= (int)vec.size() || val != vec[pos]) {
-    // need to add this item in. First shift one item to the right
-    vec.push_back(0);
-    for (int i = (int)vec.size() - 2; i >= pos; --i) {
-      vec[i + 1] = vec[i];
-    }
-    vec[pos] = val;
-  }
+//cout << "In InsertOrderedVec: val = " << val << ", vec = ";
+//DumpIntVec( vec );
+	// want to insert the item in space
+	// first find the location to add this item
+	// doing it in binary search
+	int pos = binary_search<int>(vec, 0, vec.size()-1, val);
+	YW_ASSERT_INFO( pos >= 0,  "Wrong in binary search");
+//cout << "pos = " << pos << endl;
+	if( pos >= (int) vec.size()  || val != vec[pos]  )
+	{
+		// need to add this item in. First shift one item to the right
+		vec.push_back(0);
+		for( int i=(int)vec.size()-2; i>= pos; --i )
+		{
+			vec[i+1] = vec[i];
+		}
+		vec[pos] = val;
+	}
 }
 
 //! \brief A recursive binary search using STL vectors
@@ -1039,318 +1248,336 @@ void InsertOrderedVec(vector<int> &vec, int val) {
 //! \param key The value being searched for
 //! \return The index into the vector where the value is located,
 //! or -1 if the value could not be found.
-template <typename T>
-int binary_search(const std::vector<T> &vec, unsigned start, unsigned end,
-                  const T &key) {
-  // Termination condition: start index greater than end index
-  if (start > end) {
-    return start;
-  }
+template<typename T>
+int binary_search(const std::vector<T>& vec, unsigned start, unsigned end, const T& key)
+{
+    // Termination condition: start index greater than end index
+    if(start > end)
+    {
+        return start;
+    }
 
-  // Find the middle element of the vector and use that for splitting
-  // the array into two pieces.
-  unsigned middle = (start + ((end - start) / 2));
+    // Find the middle element of the vector and use that for splitting
+    // the array into two pieces.
+    unsigned middle = (start + ((end - start) / 2));
 
-  if (vec[middle] == key) {
-    return middle;
-  } else if (vec[middle] > key) {
-    return binary_search(vec, start, middle - 1, key);
-  }
+    if(vec[middle] == key)
+    {
+        return middle;
+    }
+    else if(vec[middle] > key)
+    {
+        return binary_search(vec, start, middle - 1, key);
+    }
 
-  return binary_search(vec, middle + 1, end, key);
+    return binary_search(vec, middle + 1, end, key);
 }
 
-bool ReadIntListFromFile(const char *fname, vector<int> &listInts) {
-  // data input
-  ifstream inFile(fname);
-  if (!inFile) {
-    cout << "Can not open " << fname << endl;
-    return false;
-  }
-  listInts.clear();
-  while (inFile.eof() == false) {
-    const int BUF_SZ = 102400;
-    char buffer[BUF_SZ];
-    inFile.getline(buffer, BUF_SZ);
-    if (strlen(buffer) > 0) {
-      // cout << "buffer = " << buffer << endl;
-      int val;
-      sscanf(buffer, "%d", &val);
-      listInts.push_back(val);
-    }
-  }
+bool ReadIntListFromFile(const char *fname, vector<int> &listInts)
+{
+	// data input
+	ifstream inFile(fname);
+	if(!inFile)
+	{
+		cout << "Can not open "<< fname <<endl;
+		return false;
+	}
+	listInts.clear();
+	while( inFile.eof() == false )
+	{
+		const int BUF_SZ = 102400;
+		char buffer[BUF_SZ];
+		inFile.getline(buffer, BUF_SZ);
+		if( strlen(buffer) > 0 )
+		{
+//cout << "buffer = " << buffer << endl;
+			int val;
+			sscanf( buffer, "%d", &val );
+			listInts.push_back( val );
+		}
+	}
 
-  return true;
+	return true;
 }
 
-void GetVecPosNotInSet(const vector<int> &vec, const set<int> &s,
-                       vector<int> &posDiff) {
-  posDiff.clear();
-  //
-  for (int i = 0; i < (int)vec.size(); ++i) {
-    if (s.find(vec[i]) == s.end()) {
-      posDiff.push_back(i);
-    }
-  }
+void GetVecPosNotInSet( const vector<int> &vec, const set<int> &s, vector<int> &posDiff )
+{
+	posDiff.clear();
+	//
+	for(int i=0; i<(int)vec.size(); ++i)
+	{
+		if( s.find( vec[i] ) == s.end() )
+		{
+			posDiff.push_back( i );
+		}
+	}
 }
 
 // Suppose we have g groups of (indistingishable) items and we want to
 // divide each group into numParts colors (distinguishable)
-// this support enumerate these choices. For example, we have two segments of 3
-// and 4 items each and we have two colors, then the choices will be: [(1,2),
-// (2,2)], or [(0.3),(1,3)]
-void InitPartitionEnum(const vector<int> &vecSegSizes, int numParts,
-                       vector<vector<int> > &parts) {
-  // start from each one as the first population type has all the ones in
-  // segment
-  parts.clear();
-  parts.resize(vecSegSizes.size());
-  for (int i = 0; i < (int)vecSegSizes.size(); ++i) {
-    parts[i].push_back(vecSegSizes[i]);
-    for (int j = 1; j < numParts; ++j) {
-      parts[i].push_back(0);
-    }
-    // cout << "InitPartitionEnum: part = ";
-    // DumpIntVec(parts[i]);
-  }
+// this support enumerate these choices. For example, we have two segments of 3 and 4 items each
+// and we have two colors, then the choices will be: [(1,2), (2,2)], or [(0.3),(1,3)]
+void InitPartitionEnum( const vector<int> &vecSegSizes, int numParts, vector< vector<int> > &parts )
+{
+	// start from each one as the first population type has all the ones in segment
+	parts.clear();
+	parts.resize( vecSegSizes.size() );
+	for( int i=0; i<(int)vecSegSizes.size(); ++i )
+	{
+		parts[i].push_back( vecSegSizes[i] );
+		for(int j=1; j<numParts; ++j)
+		{
+			parts[i].push_back( 0 );
+		}
+//cout << "InitPartitionEnum: part = ";
+//DumpIntVec(parts[i]);
+	}
 }
 
-bool GetNextPartitionEnum(const vector<int> &vecSegSizes, int numParts,
-                          vector<vector<int> > &parts) {
-  // cout << "GetNextPartitionEnum: numParts = " << numParts << ", vecSegSizes =
-  // "; DumpIntVec(vecSegSizes);
-  // get next partition, return false if done
-  // first search for the part where we can change (by moving some item to the
-  // front)
-  YW_ASSERT_INFO(parts.size() == vecSegSizes.size(),
-                 "GetNextPartitionEnum: size mismatch");
-  int segChange = -1;
-  for (int seg = 0; seg < (int)vecSegSizes.size(); ++seg) {
-    YW_ASSERT_INFO((int)parts[seg].size() == numParts,
-                   "GetNextPartitionEnum: seg size mismatch");
-    // when the part has concerntrated to the last population, this is the sign
-    // that this part has changed it partiton to its limit
-    if (parts[seg][numParts - 1] != vecSegSizes[seg]) {
-      segChange = seg;
-      break;
-    }
-  }
-  if (segChange < 0) {
-    // done
-    return false;
-  }
-  // cout << "segChange = " << segChange << endl;
-  //
-  vector<vector<int> > partsNew = parts;
-  // the first segments before this seg is re-set
-  for (int s = 0; s < segChange; ++s) {
-    partsNew[s][0] = vecSegSizes[s];
-    for (int j = 1; j < numParts; ++j) {
-      partsNew[s][j] = 0;
-    }
-  }
-  // then segChange one gets shift by one
-  // this is done by finding the least numbered population and
-  // move it out to one larger AND concerntrate all the ones up to this point to
-  // the first position
-  int pp = -1;
-  // int numItemsToi = 0;
-  for (int i = 0; i < numParts; ++i) {
-    if (parts[segChange][i] > 0) {
-      pp = i;
-      break;
-    }
-  }
-  // cout << "pp = " << pp << endl;
-  YW_ASSERT_INFO(pp >= 0 && pp < numParts - 1, "Can not be true");
-  vector<int> segNew = parts[segChange];
-  segNew[0] = parts[segChange][pp] - 1;
-  if (pp != 0) {
-    segNew[pp] = 0;
-  }
-  segNew[pp + 1]++;
-
-  partsNew[segChange] = segNew;
-
-  // the rest remain the same
-  parts = partsNew;
-  // cout << "Next parts id = \n";
-  // for(int i=0;i<(int)parts.size(); ++i)
-  //{
-  // DumpIntVec( parts[i] );
-  //}
-  return true;
-}
-
-int GetPartEnumIndex(const vector<int> &vecSegSizes, int numParts,
-                     const vector<vector<int> > &parts) {
-  // get the index (order in the enumerated list) of the given enumerated
-  // partition cout from the right hand side
-  YW_ASSERT_INFO(vecSegSizes.size() == parts.size(),
-                 "GetPartEnumIndex: size wrong");
-  int res = 0;
-  for (int i = (int)vecSegSizes.size() - 1; i >= 0; --i) {
-    if (i < (int)vecSegSizes.size() - 1) {
-      res *= GetPartitionEnumNum(vecSegSizes[i], numParts);
-    }
-    res += GetPartitionEnumId(vecSegSizes[i], parts[i]);
-  }
-  return res;
+bool GetNextPartitionEnum( const vector<int> &vecSegSizes, int numParts, vector< vector<int> > &parts )
+{
+//cout << "GetNextPartitionEnum: numParts = " << numParts << ", vecSegSizes = ";
+//DumpIntVec(vecSegSizes);
+	// get next partition, return false if done
+	// first search for the part where we can change (by moving some item to the front)
+	YW_ASSERT_INFO( parts.size() ==  vecSegSizes.size(), "GetNextPartitionEnum: size mismatch" );
+	int segChange = -1;
+	for(int seg=0; seg<(int)vecSegSizes.size(); ++seg)
+	{
+		YW_ASSERT_INFO( (int)parts[seg].size() ==  numParts, "GetNextPartitionEnum: seg size mismatch" );
+		// when the part has concerntrated to the last population, this is the sign that this part has changed it partiton to its limit
+		if( parts[seg][numParts-1] != vecSegSizes[seg]  )
+		{
+			segChange = seg;
+			break;
+		}
+	}
+	if( segChange < 0 )
+	{
+		// done
+		return false;
+	}
+//cout << "segChange = " << segChange << endl;
+	//
+	vector< vector<int> > partsNew = parts;
+	// the first segments before this seg is re-set
+	for(int s=0; s<segChange; ++s)
+	{
+		partsNew[s][0] = vecSegSizes[s];
+		for(int j=1; j<numParts; ++j)
+		{
+			partsNew[s][j] =  0 ;
+		}
+	}
+	// then segChange one gets shift by one
+	// this is done by finding the least numbered population and
+	// move it out to one larger AND concerntrate all the ones up to this point to the first position
+	int pp=-1;
+	//int numItemsToi = 0;
+	for(int i=0; i<numParts; ++i)
+	{
+		if( parts[segChange][i] > 0 )
+		{
+			pp = i;
+			break;
+		}
+	}
+//cout << "pp = " << pp << endl;
+	YW_ASSERT_INFO( pp >= 0 && pp < numParts-1, "Can not be true" );
+	vector<int> segNew = parts[segChange];
+	segNew[0] = parts[segChange][pp]-1;
+	if( pp != 0 )
+	{
+		segNew[pp] = 0;
+	}
+	segNew[pp+1] ++;
+
+	partsNew[segChange] = segNew;
+
+	// the rest remain the same
+	parts = partsNew;
+//cout << "Next parts id = \n";
+//for(int i=0;i<(int)parts.size(); ++i)
+//{
+//DumpIntVec( parts[i] );
+//}
+	return true;
+}
+
+int GetPartEnumIndex( const vector<int> &vecSegSizes, int numParts, const vector< vector<int> > &parts )
+{
+	// get the index (order in the enumerated list) of the given enumerated partition
+	// cout from the right hand side
+	YW_ASSERT_INFO(vecSegSizes.size() == parts.size(), "GetPartEnumIndex: size wrong");
+	int res = 0;
+	for(int i= (int)vecSegSizes.size()-1; i>=0; --i)
+	{
+		if( i < (int)vecSegSizes.size()-1 )
+		{
+			res *= GetPartitionEnumNum( vecSegSizes[i], numParts  );
+		}
+		res += GetPartitionEnumId( vecSegSizes[i], parts[i] );
+	}
+	return res;
 }
 
+
 // Now allow chaing parts num
-void InitPartitionEnumVar(const vector<int> &vecSegSizes,
-                          const vector<int> &listNumParts,
-                          vector<vector<int> > &parts) {
-  // start from each one as the first population type has all the ones in
-  // segment
-  YW_ASSERT_INFO(vecSegSizes.size() == listNumParts.size(), "Mismatch");
-  parts.clear();
-  parts.resize(vecSegSizes.size());
-  for (int i = 0; i < (int)vecSegSizes.size(); ++i) {
-    parts[i].push_back(vecSegSizes[i]);
-    for (int j = 1; j < listNumParts[i]; ++j) {
-      parts[i].push_back(0);
-    }
-    // cout << "InitPartitionEnum: part = ";
-    // DumpIntVec(parts[i]);
-  }
+void InitPartitionEnumVar( const vector<int> &vecSegSizes, const vector<int> &listNumParts, vector< vector<int> > &parts )
+{
+	// start from each one as the first population type has all the ones in segment
+    YW_ASSERT_INFO(vecSegSizes.size() == listNumParts.size(), "Mismatch");
+	parts.clear();
+	parts.resize( vecSegSizes.size() );
+	for( int i=0; i<(int)vecSegSizes.size(); ++i )
+	{
+		parts[i].push_back( vecSegSizes[i] );
+		for(int j=1; j<listNumParts[i]; ++j)
+		{
+			parts[i].push_back( 0 );
+		}
+        //cout << "InitPartitionEnum: part = ";
+        //DumpIntVec(parts[i]);
+	}
+}
+
+bool GetNextPartitionEnumVar( const vector<int> &vecSegSizes, const vector<int> &listNumParts, vector< vector<int> > &parts )
+{
+//cout << "GetNextPartitionEnumVar: vecSegSizes = ";
+//DumpIntVec(vecSegSizes);
+//cout << "listNumparts: ";
+//DumpIntVec(listNumParts);
+//cout << "parts: ";
+//DumpVecSequences(parts);
+    YW_ASSERT_INFO(vecSegSizes.size() == listNumParts.size(), "Mismatch");
+    //cout << "GetNextPartitionEnum: numParts = " << numParts << ", vecSegSizes = ";
+    //DumpIntVec(vecSegSizes);
+	// get next partition, return false if done
+	// first search for the part where we can change (by moving some item to the front)
+	YW_ASSERT_INFO( parts.size() ==  vecSegSizes.size(), "GetNextPartitionEnum: size mismatch" );
+	int segChange = -1;
+	for(int seg=0; seg<(int)vecSegSizes.size(); ++seg)
+	{
+		YW_ASSERT_INFO( (int)parts[seg].size() ==  listNumParts[seg], "GetNextPartitionEnum: seg size mismatch" );
+		// when the part has concerntrated to the last population, this is the sign that this part has changed it partiton to its limit
+		if( parts[seg][listNumParts[seg]-1] != vecSegSizes[seg]  )
+		{
+			segChange = seg;
+			break;
+		}
+	}
+	if( segChange < 0 )
+	{
+		// done
+//cout << "Done\n";
+		return false;
+	}
+//cout << "segChange = " << segChange << endl;
+	//
+	vector< vector<int> > partsNew = parts;
+	// the first segments before this seg is re-set
+	for(int s=0; s<segChange; ++s)
+	{
+		partsNew[s][0] = vecSegSizes[s];
+		for(int j=1; j<listNumParts[s]; ++j)
+		{
+			partsNew[s][j] =  0 ;
+		}
+	}
+	// then segChange one gets shift by one
+	// this is done by finding the least numbered population and
+	// move it out to one larger AND concerntrate all the ones up to this point to the first position
+	int pp=-1;
+	//int numItemsToi = 0;
+	for(int i=0; i<listNumParts[segChange]; ++i)
+	{
+		if( parts[segChange][i] > 0 )
+		{
+			pp = i;
+			break;
+		}
+	}
+//cout << "pp = " << pp << endl;
+	YW_ASSERT_INFO( pp >= 0 && pp < listNumParts[segChange]-1, "Can not be true" );
+	vector<int> segNew = parts[segChange];
+	segNew[0] = parts[segChange][pp]-1;
+	if( pp != 0 )
+	{
+		segNew[pp] = 0;
+	}
+	segNew[pp+1] ++;
+
+	partsNew[segChange] = segNew;
+
+	// the rest remain the same
+	parts = partsNew;
+//cout << "Next parts id = \n";
+//for(int i=0;i<(int)parts.size(); ++i)
+//{
+//DumpIntVec( parts[i] );
+//}
+	return true;
 }
 
-bool GetNextPartitionEnumVar(const vector<int> &vecSegSizes,
-                             const vector<int> &listNumParts,
-                             vector<vector<int> > &parts) {
-  // cout << "GetNextPartitionEnumVar: vecSegSizes = ";
-  // DumpIntVec(vecSegSizes);
-  // cout << "listNumparts: ";
-  // DumpIntVec(listNumParts);
-  // cout << "parts: ";
-  // DumpVecSequences(parts);
-  YW_ASSERT_INFO(vecSegSizes.size() == listNumParts.size(), "Mismatch");
-  // cout << "GetNextPartitionEnum: numParts = " << numParts << ", vecSegSizes =
-  // "; DumpIntVec(vecSegSizes);
-  // get next partition, return false if done
-  // first search for the part where we can change (by moving some item to the
-  // front)
-  YW_ASSERT_INFO(parts.size() == vecSegSizes.size(),
-                 "GetNextPartitionEnum: size mismatch");
-  int segChange = -1;
-  for (int seg = 0; seg < (int)vecSegSizes.size(); ++seg) {
-    YW_ASSERT_INFO((int)parts[seg].size() == listNumParts[seg],
-                   "GetNextPartitionEnum: seg size mismatch");
-    // when the part has concerntrated to the last population, this is the sign
-    // that this part has changed it partiton to its limit
-    if (parts[seg][listNumParts[seg] - 1] != vecSegSizes[seg]) {
-      segChange = seg;
-      break;
-    }
-  }
-  if (segChange < 0) {
-    // done
-    // cout << "Done\n";
-    return false;
-  }
-  // cout << "segChange = " << segChange << endl;
-  //
-  vector<vector<int> > partsNew = parts;
-  // the first segments before this seg is re-set
-  for (int s = 0; s < segChange; ++s) {
-    partsNew[s][0] = vecSegSizes[s];
-    for (int j = 1; j < listNumParts[s]; ++j) {
-      partsNew[s][j] = 0;
-    }
-  }
-  // then segChange one gets shift by one
-  // this is done by finding the least numbered population and
-  // move it out to one larger AND concerntrate all the ones up to this point to
-  // the first position
-  int pp = -1;
-  // int numItemsToi = 0;
-  for (int i = 0; i < listNumParts[segChange]; ++i) {
-    if (parts[segChange][i] > 0) {
-      pp = i;
-      break;
-    }
-  }
-  // cout << "pp = " << pp << endl;
-  YW_ASSERT_INFO(pp >= 0 && pp < listNumParts[segChange] - 1,
-                 "Can not be true");
-  vector<int> segNew = parts[segChange];
-  segNew[0] = parts[segChange][pp] - 1;
-  if (pp != 0) {
-    segNew[pp] = 0;
-  }
-  segNew[pp + 1]++;
-
-  partsNew[segChange] = segNew;
-
-  // the rest remain the same
-  parts = partsNew;
-  // cout << "Next parts id = \n";
-  // for(int i=0;i<(int)parts.size(); ++i)
-  //{
-  // DumpIntVec( parts[i] );
-  //}
-  return true;
-}
-
-int GetPartEnumIndexVar(const vector<int> &vecSegSizes,
-                        const vector<int> &listNumParts,
-                        const vector<vector<int> > &parts) {
-  YW_ASSERT_INFO(vecSegSizes.size() == listNumParts.size(), "Mismatch");
-  // get the index (order in the enumerated list) of the given enumerated
-  // partition cout from the right hand side
-  YW_ASSERT_INFO(vecSegSizes.size() == parts.size(),
-                 "GetPartEnumIndex: size wrong");
-  int res = 0;
-  for (int i = (int)vecSegSizes.size() - 1; i >= 0; --i) {
-    if (i < (int)vecSegSizes.size() - 1) {
-      res *= GetPartitionEnumNum(vecSegSizes[i], listNumParts[i]);
-    }
-    res += GetPartitionEnumId(vecSegSizes[i], parts[i]);
-  }
-  return res;
+int GetPartEnumIndexVar( const vector<int> &vecSegSizes, const vector<int> &listNumParts, const vector< vector<int> > &parts )
+{
+    YW_ASSERT_INFO(vecSegSizes.size() == listNumParts.size(), "Mismatch");
+	// get the index (order in the enumerated list) of the given enumerated partition
+	// cout from the right hand side
+	YW_ASSERT_INFO(vecSegSizes.size() == parts.size(), "GetPartEnumIndex: size wrong");
+	int res = 0;
+	for(int i= (int)vecSegSizes.size()-1; i>=0; --i)
+	{
+		if( i < (int)vecSegSizes.size()-1 )
+		{
+			res *= GetPartitionEnumNum( vecSegSizes[i], listNumParts[i]  );
+		}
+		res += GetPartitionEnumId( vecSegSizes[i], parts[i] );
+	}
+	return res;
 }
 
 // **************************************************************************************
 // code for enumearing partitions (based on Ruhua's code)
-// hereis the pre-initied enumeration, format: <num of lins, num of color>,
-// enumeration
-static map<pair<int, int>, vector_vector_t> mapEnumeratedPartitions;
-
-int GetPartitionEnumNum(int n, int numSPop) {
-  // cout << "GetPartitionEnumNum: n = " << n << ", numSPop = " << numSPop;
-  if (numSPop == 0) {
-    return 0;
-  }
-  // how many number of partitons of identical balls into p colors
-  double resd = 1.0;
-  for (int j = 1; j <= numSPop - 1; ++j) {
-    resd *= (1.0 * (n + numSPop - j)) / j;
-  }
-  int res = (int)(resd);
-  // cout << ", res = " << res << endl;
-  return res;
-}
-
-int GetPartitionEnumId(int numItemsTot, const vector<int> &vec) {
-  int numColor = vec.size();
-  // cout << "numItesmTotl: " << numItemsTot << ", numColor = " << numColor <<
-  // ", vec = "; DumpIntVec( vec );
-  pair<int, int> pp(numItemsTot, numColor);
-  bool fExist =
-      mapEnumeratedPartitions.find(pp) != mapEnumeratedPartitions.end();
-  if (fExist == false) {
-    vector_vector_t tt;
-    mapEnumeratedPartitions.insert(
-        map<pair<int, int>, vector_vector_t>::value_type(pp, tt));
-  }
-  int res = -1;
-  convert_vector_to_index(fExist, vec, res, mapEnumeratedPartitions[pp]);
-  YW_ASSERT_INFO(res >= 0, "Fail in GetPartitioId");
-  // cout << "parition id: " << res  << ",numItemsTot: " << numItemsTot << ",
-  // vec = "; DumpIntVec( vec );
-  return res;
+// hereis the pre-initied enumeration, format: <num of lins, num of color>, enumeration
+static map<pair<int,int>, vector_vector_t> mapEnumeratedPartitions;
+
+int GetPartitionEnumNum( int n, int numSPop )
+{
+//cout << "GetPartitionEnumNum: n = " << n << ", numSPop = " << numSPop;
+    if(numSPop == 0 )
+    {
+        return 0;
+    }
+	// how many number of partitons of identical balls into p colors
+	double resd = 1.0;
+	for(int j=1; j<= numSPop- 1; ++j)
+	{
+		resd *= (1.0*(n+numSPop-j))/j;
+	}
+    int res =(int)(resd);
+//cout << ", res = " << res << endl;
+	return res;
+}
+
+int GetPartitionEnumId( int numItemsTot, const vector<int> &vec )
+{
+    int numColor = vec.size();
+//cout << "numItesmTotl: " << numItemsTot << ", numColor = " << numColor << ", vec = ";
+//DumpIntVec( vec );
+    pair<int,int> pp(numItemsTot, numColor);
+    bool fExist = mapEnumeratedPartitions.find(pp) != mapEnumeratedPartitions.end();
+    if( fExist == false )
+    {
+        vector_vector_t tt;
+        mapEnumeratedPartitions.insert( map<pair<int,int>,vector_vector_t> :: value_type(pp, tt) );
+    }
+    int res = -1;
+    convert_vector_to_index( fExist, vec, res, mapEnumeratedPartitions[pp] );
+    YW_ASSERT_INFO(res >= 0, "Fail in GetPartitioId");
+//cout << "parition id: " << res  << ",numItemsTot: " << numItemsTot << ", vec = ";
+//DumpIntVec( vec );
+    return res;
 
 #if 0
 	// for this enumerated vector, where does it stand in the enumeration order?
@@ -1405,23 +1632,20 @@ int GetPartitionEnumId(int numItemsTot, const vector<int> &vec) {
 #endif
 }
 
-void GetPartitionEnumPartForId(int numItemsTot, int numParts, int eid,
-                               vector<int> &vecres) {
-  pair<int, int> pp(numItemsTot, numParts);
-  bool fExist =
-      mapEnumeratedPartitions.find(pp) != mapEnumeratedPartitions.end();
-  if (fExist == false) {
-    vector_vector_t tt;
-    mapEnumeratedPartitions.insert(
-        map<pair<int, int>, vector_vector_t>::value_type(pp, tt));
-  }
-  convert_index_to_vector(fExist, numParts, numItemsTot, eid, vecres,
-                          mapEnumeratedPartitions[pp]);
-  // YW_ASSERT_INFO(vecres.size() >= 0, "Fail in GetPartitionEnumPartForId");
+void GetPartitionEnumPartForId( int numItemsTot, int numParts, int eid, vector<int> &vecres )
+{
+    pair<int,int> pp(numItemsTot, numParts);
+    bool fExist = mapEnumeratedPartitions.find(pp) != mapEnumeratedPartitions.end();
+    if( fExist == false )
+    {
+        vector_vector_t tt;
+        mapEnumeratedPartitions.insert( map<pair<int,int>,vector_vector_t> :: value_type(pp, tt) );
+    }
+    convert_index_to_vector( fExist, numParts, numItemsTot, eid, vecres, mapEnumeratedPartitions[pp] );
+    //YW_ASSERT_INFO(vecres.size() >= 0, "Fail in GetPartitionEnumPartForId");
 
-  // cout << "ConvPartition to id: parition id: " << eid  << ",numItemsTot: " <<
-  // numItemsTot << ", numParts: " << numParts << ", vecres = "; DumpIntVec(
-  // vecres );
+//cout << "ConvPartition to id: parition id: " << eid  << ",numItemsTot: " << numItemsTot << ", numParts: " << numParts << ", vecres = ";
+//DumpIntVec( vecres );
 
 #if 0
 
@@ -1483,36 +1707,33 @@ void GetPartitionEnumPartForId(int numItemsTot, int numParts, int eid,
 }
 
 // **************************************************************************************
-void MoveOneItemInPartEnum(const vector<vector<int> > &partsSrc, int part,
-                           int psrc, int pdest,
-                           vector<vector<int> > &partsDest) {
-  YW_ASSERT_INFO(partsSrc.size() > 0, "MoveOneItemInPartEnum: wrong1");
-  YW_ASSERT_INFO(part < (int)partsSrc.size(), "MoveOneItemInPartEnum: wrong2");
-  YW_ASSERT_INFO(psrc < (int)partsSrc[0].size() &&
-                     pdest < (int)partsSrc[0].size(),
-                 "MoveOneItemInPartEnum: wrong3");
-  partsDest = partsSrc;
-  partsDest[part][psrc]--;
-  partsDest[part][pdest]++;
-}
-
-void ConvIndexToPartEnum(const vector<int> &vecSegSizes, int numParts,
-                         int pIndex, vector<vector<int> > &parts) {
-  // convert the index of enumeration to a real enumeration
-  // parts.clear();
-
-  // it would be nice to implement it, but there is clear use yet. so skip
-  // YW_ASSERT_INFO(false, "Not implemented yet. TBD.");
-  vector<int> listSizes;
-  for (int i = 0; i < (int)vecSegSizes.size(); ++i) {
-    listSizes.push_back(numParts);
-  }
-  ConvIndexToPartEnumVar(vecSegSizes, listSizes, pIndex, parts);
+void MoveOneItemInPartEnum( const vector< vector<int> > &partsSrc, int part, int psrc, int pdest, vector< vector<int> > &partsDest )
+{
+	YW_ASSERT_INFO( partsSrc.size()>0, "MoveOneItemInPartEnum: wrong1" );
+	YW_ASSERT_INFO( part < (int)partsSrc.size(), "MoveOneItemInPartEnum: wrong2" );
+	YW_ASSERT_INFO( psrc < (int)partsSrc[0].size() && pdest < (int)partsSrc[0].size(), "MoveOneItemInPartEnum: wrong3" );
+	partsDest = partsSrc;
+	partsDest[part][psrc] --;
+	partsDest[part][pdest] ++;
+}
+
+void ConvIndexToPartEnum(const vector<int> &vecSegSizes, int numParts, int pIndex, vector< vector<int> > &parts)
+{
+	// convert the index of enumeration to a real enumeration
+	//parts.clear();
+
+	// it would be nice to implement it, but there is clear use yet. so skip
+	//YW_ASSERT_INFO(false, "Not implemented yet. TBD.");
+    vector<int> listSizes;
+    for(int i=0; i<(int)vecSegSizes.size(); ++i)
+    {
+        listSizes.push_back(numParts);
+    }
+    ConvIndexToPartEnumVar( vecSegSizes, listSizes, pIndex, parts );
 }
 
-void ConvIndexToPartEnumVar(const vector<int> &vecSegSizes,
-                            const vector<int> &listNumParts, int pIndex,
-                            vector<vector<int> > &parts) {
+void ConvIndexToPartEnumVar(const vector<int> &vecSegSizes, const vector<int> &listNumParts, int pIndex, vector< vector<int> > &parts)
+{
 #if 0
 cout << "ConvIndexToPartEnumVar: vecSegSizes: ";
 DumpIntVec(vecSegSizes);
@@ -1520,241 +1741,248 @@ cout << "ListNumParts: ";
 DumpIntVec(listNumParts);
 cout << "pindex: " << pIndex << endl;
 #endif
-  //
-  YW_ASSERT_INFO(vecSegSizes.size() == listNumParts.size(), "Mismatch");
-  // get the index (order in the enumerated list) of the given enumerated
-  // partition cout from the right hand side
-  parts.clear();
+    //
+    YW_ASSERT_INFO(vecSegSizes.size() == listNumParts.size(), "Mismatch");
+	// get the index (order in the enumerated list) of the given enumerated partition
+	// cout from the right hand side
+    parts.clear();
 
-  int res = pIndex;
-  for (int i = 0; i < (int)vecSegSizes.size(); ++i) {
-    int totEnumNumStep = GetPartitionEnumNum(vecSegSizes[i], listNumParts[i]);
-    int idStep = (res % totEnumNumStep);
+	int res = pIndex;
+    for(int i=0; i<(int)vecSegSizes.size(); ++i )
+    {
+        int totEnumNumStep = GetPartitionEnumNum( vecSegSizes[i], listNumParts[i] );
+        int idStep = ( res % totEnumNumStep );
 
-    vector<int> partsStep;
-    GetPartitionEnumPartForId(vecSegSizes[i], listNumParts[i], idStep,
-                              partsStep);
-    parts.push_back(partsStep);
+        vector<int> partsStep;
+        GetPartitionEnumPartForId( vecSegSizes[i], listNumParts[i], idStep, partsStep );
+        parts.push_back(partsStep);
 
-    // reduce res
-    res = (res - idStep) / totEnumNumStep;
+        // reduce res
+        res = (res - idStep)/totEnumNumStep;
 
-    // cout << "idStep: " << idStep << ", partsStep: ";
-    // DumpIntVec(partsStep);
-  }
+//cout << "idStep: " << idStep << ", partsStep: ";
+//DumpIntVec(partsStep);
+    }
 }
 
-void AddIntVec(vector<int> &vecDest, const vector<int> &vecSrc) {
-  YW_ASSERT_INFO(vecDest.size() == vecSrc.size(), "AddIntVec: size mismatch");
-  for (int i = 0; i < (int)vecSrc.size(); ++i) {
-    vecDest[i] += vecSrc[i];
-  }
+
+void AddIntVec( vector<int> &vecDest, const vector<int> &vecSrc)
+{
+	YW_ASSERT_INFO( vecDest.size() == vecSrc.size(), "AddIntVec: size mismatch" );
+	for(int i=0; i<(int)vecSrc.size(); ++i)
+	{
+		vecDest[i] += vecSrc[i];
+	}
 }
 
-void SubtractIntVec(vector<int> &vecDest, const vector<int> &vecSubtracted) {
-  //
-  YW_ASSERT_INFO(vecDest.size() == vecSubtracted.size(),
-                 "AddIntVec: size mismatch");
-  for (int i = 0; i < (int)vecSubtracted.size(); ++i) {
-    vecDest[i] -= vecSubtracted[i];
-  }
+void SubtractIntVec( vector<int> &vecDest, const vector<int> &vecSubtracted )
+{
+    //
+    YW_ASSERT_INFO( vecDest.size() == vecSubtracted.size(), "AddIntVec: size mismatch" );
+	for(int i=0; i<(int)vecSubtracted.size(); ++i)
+	{
+		vecDest[i] -= vecSubtracted[i];
+	}
 }
 
-void GetItemsInRange(const set<int> &items, int lb, int ub, set<int> &sset) {
-  sset.clear();
-  //
-  for (set<int>::iterator it = items.begin(); it != items.end(); ++it) {
-    if (*it >= lb && *it <= ub) {
-      sset.insert(*it);
-    }
-  }
+void GetItemsInRange( const set<int> &items, int lb, int ub, set<int> &sset )
+{
+	sset.clear();
+	//
+	for(set<int> :: iterator it = items.begin(); it != items.end(); ++it)
+	{
+		if( *it >= lb && *it <= ub)
+		{
+			sset.insert(*it);
+		}
+	}
 }
 
-void InitRandom(int seed) {
-  double randTmp = GetRandFraction();
-  cout << "Get one random fraction: " << randTmp
-       << ", then initialize random seed to " << seed << endl;
-  srand(seed);
-}
-void PermuatePseudoRandomVec(vector<int> &vecPerm) {
-  // take a simple strategy: pick two arbitary positions and exchange them
-  int numRounds = vecPerm.size();
-  int vecLen = vecPerm.size();
-  for (int r = 0; r < numRounds; ++r) {
-    int i = (int)((rand() * 1.0 / RAND_MAX) * vecLen);
-    int j = (int)((rand() * 1.0 / RAND_MAX) * vecLen);
-    // int i = (int) (vecLen * GetRandFraction() );
-    // int j = (int) (vecLen * GetRandFraction() );
-    int tmp = vecPerm[i];
-    vecPerm[i] = vecPerm[j];
-    vecPerm[j] = tmp;
-  }
+void InitRandom(int seed)
+{
+    double randTmp = GetRandFraction();
+    cout << "Get one random fraction: " << randTmp << ", then initialize random seed to " << seed << endl;
+	srand(seed);
+}
+void PermuatePseudoRandomVec( vector<int> &vecPerm )
+{
+	// take a simple strategy: pick two arbitary positions and exchange them
+	int numRounds = vecPerm.size();
+	int vecLen = vecPerm.size();
+	for( int r=0; r<numRounds; ++r )
+	{
+		int i= (int) ( (rand()*1.0/RAND_MAX) * vecLen);
+		int j= (int) ( ( rand()*1.0/RAND_MAX) * vecLen);
+		//int i = (int) (vecLen * GetRandFraction() );
+		//int j = (int) (vecLen * GetRandFraction() );
+		int tmp = vecPerm[i];
+		vecPerm[i] = vecPerm[j];
+		vecPerm[j] = tmp;
+	}
 }
 
-void UnionMultiset(multiset<int> &setUpdate, const multiset<int> &setAdded) {
-  for (multiset<int>::iterator it = setAdded.begin(); it != setAdded.end();
-       ++it) {
-    setUpdate.insert(*it);
-  }
+void UnionMultiset(multiset<int> &setUpdate, const multiset<int> &setAdded)
+{
+	for(multiset<int> :: iterator it=setAdded.begin(); it!=setAdded.end(); ++it)
+	{
+		setUpdate.insert(*it);
+	}
 }
 
-void JoinMultiset(const multiset<int> &set1, const multiset<int> &set2,
-                  multiset<int> &setInt) {
-  for (multiset<int>::iterator it = set1.begin(); it != set1.end(); ++it) {
-    if (set2.find(*it) != set2.end()) {
-      setInt.insert(*it);
-    }
-  }
+void JoinMultiset(const multiset<int> &set1, const multiset<int> &set2, multiset<int> &setInt)
+{
+	for( multiset<int> :: iterator it=set1.begin(); it!=set1.end(); ++it )
+	{
+		if( set2.find(*it) != set2.end() )
+		{
+			setInt.insert(*it);
+		}
+	}
 }
 
-void ConvMSetToSet(const multiset<int> &mset, set<int> &ss) {
-  ss.clear();
-  for (multiset<int>::iterator it = mset.begin(); it != mset.end(); ++it) {
-    ss.insert(*it);
-  }
+void ConvMSetToSet(const multiset<int> &mset, set<int> &ss)
+{
+	ss.clear();
+	for( multiset<int> :: iterator it = mset.begin(); it != mset.end(); ++it)
+	{
+		ss.insert(*it);
+	}
 }
 
-void DumpMultiset(const multiset<int> &mset) {
-  for (multiset<int>::iterator it = mset.begin(); it != mset.end(); ++it) {
-    cout << *it << "    ";
-  }
-  cout << endl;
+void DumpMultiset(const multiset<int> &mset)
+{
+	for( multiset<int> :: iterator it = mset.begin(); it != mset.end(); ++it)
+	{
+		cout << *it << "    ";
+	}
+	cout << endl;
 }
 
-int CalcNumNChooseK(int n, int k) {
-  // how many ways to choose k items from n items
-  YW_ASSERT_INFO(n >= k, "n must be no smaller than k");
-  double res = 1.0;
-  int kuse = k;
-  if (n - k < kuse) {
-    kuse = n - k;
-  }
-  for (int i = 0; i < kuse; ++i) {
-    res *= (1.0 * (n - i)) / (i + 1);
-  }
-  return (int)res;
+int CalcNumNChooseK(int n, int k)
+{
+	// how many ways to choose k items from n items
+	YW_ASSERT_INFO( n >=k, "n must be no smaller than k" );
+	double res = 1.0;
+	int kuse = k;
+	if( n-k < kuse)
+	{
+		kuse = n-k;
+	}
+	for(int i=0; i<kuse; ++i)
+	{
+		res *= (1.0*(n-i))/(i+1);
+	}
+	return (int)res;
 }
 
 // the following two functions are used to enumerate all partitions
 
-void InitSubsetPartitionEnum(int numItems, int numParts,
-                             vector<vector<int> > &parts) {
-  int n = numItems;
-  int p = numParts;
+void InitSubsetPartitionEnum(int numItems, int numParts, vector< vector<int> > &parts ) {
+  int n =numItems;
+  int p =numParts;
   parts.clear();
   parts.push_back(vector<int>());
-  for (int i = 0; i <= n - p; i++) {
+  for (int i=0;i<=n-p;i++) {
     parts[0].push_back(i);
   }
-  for (int i = n - p + 1; i <= n - 1; i++) {
-    parts.push_back(vector<int>());
-    parts[parts.size() - 1].push_back(i);
-  }
-}
-bool GetNextSubsetPartitionEnum(int numItems, int numParts,
-                                vector<vector<int> > &parts) {
-  // assuming all the elements in @parts is distinct and the number of these
-  // elements is @numItems
-  int n = numItems;
-  int p = numParts;
-  if (((int)parts.size()) != p)
-    return false;
-  for (int i = 0; i < (int)parts.size(); i++) {
-    if (parts[i].empty())
-      return false;
-    sort(parts[i].begin(), parts[i].end());
-  }
-  vector<int> M;
-  vector<int> K;
-  M.reserve(n);
-  K.reserve(n);
-  for (int i = 0; i < n; i++) {
-    M.push_back(0);
-    K.push_back(0);
-  }
-  int lastmin = -1;
-  for (int i = 0; i < (int)parts.size(); ++i) {
-    int mmin = n;
-    int key = -1;
-    for (int j = 0; j < (int)parts.size(); ++j) {
-      if (parts[j][0] > lastmin && parts[j][0] < mmin) {
-        key = j;
-        mmin = parts[j][0];
-      }
-    }
-    lastmin = mmin;
-    for (int j = 0; j < (int)parts[key].size(); ++j) {
-      K[parts[key][j]] = i;
-    }
-  }
-  M[0] = K[0];
-  for (int i = 1; i < n; i++) {
-    if (K[i] > M[i - 1])
-      M[i] = K[i];
-    else
-      M[i] = M[i - 1];
-  }
-
-  bool success = false;
-  for (int i = n - 1; i >= 1; --i) {
-    if (K[i] < p - 1 && K[i] <= M[i - 1]) {
-      success = true;
-      K[i] = K[i] + 1;
-      if (K[i] > M[i])
-        M[i] = K[i];
-      for (int j = i + 1; j <= n - (p - M[i]); ++j) {
-        K[j] = 0;
-        M[j] = M[i];
-      }
-      for (int j = n - (p - M[i]) + 1; j <= n - 1; ++j) {
-        K[j] = p - (n - j);
-        M[j] = p - (n - j);
-      }
-      break;
-    }
-  }
-  if (!success)
-    return false;
-  parts.clear();
-  for (int i = 0; i < p; i++) {
+  for (int i=n-p+1;i<=n-1;i++) {
     parts.push_back(vector<int>());
-  }
-  for (int i = 0; i < n; i++) {
-    parts[K[i]].push_back(i);
-  }
-  return true;
-}
-
-// another enumeration: we have n items, need to consider all possible splits of
-// n into k parts where there is a limit of sizes for each of the k parts. E.g.
-// n=10, 3 types, bounds=2,4,8 (type 1 has no more than 2, type-2 has no more
-// than 4 and type-3 has no more than 8) we assume sum of these bounds >=n.
-// Otherwise fatal error. Then we can have [1,3,6],[0,2,8] and so on in the case
-// lower bounds are small, we start with the last entry being the highest number
-void InitBoundedPartitionEnum(int numItems,
-                              const vector<int> &lowerBoundsOnParts,
-                              const vector<int> &upperBoundsOnParts,
-                              vector<int> &partSizes) {
-  YW_ASSERT_INFO(upperBoundsOnParts.size() == lowerBoundsOnParts.size(),
-                 "Bound sizes: mismatch");
-  YW_ASSERT_INFO(upperBoundsOnParts.size() >= 1,
-                 "Must have at least one partition");
-  YW_ASSERT_INFO(SumIntVector(upperBoundsOnParts) >= numItems,
-                 "InitBoundedPartitionEnum: upper bounds too small");
-  int sumLBs = SumIntVector(lowerBoundsOnParts);
-  YW_ASSERT_INFO(sumLBs <= numItems,
-                 "InitBoundedPartitionEnum: lower bounds too large");
-  // now start enumerate
-  partSizes = lowerBoundsOnParts;
-  partSizes[partSizes.size() - 1] = numItems - sumLBs;
-  // cout << "InitBoundedPartitionEnum: partSizes = ";
-  // DumpIntVec(partSizes);
-}
-
-bool GetNextBoundedPartitionEnum(int numItems,
-                                 const vector<int> &lowerBoundsOnParts,
-                                 const vector<int> &upperBoundsOnParts,
-                                 vector<int> &partSizes) {
+    parts[parts.size()-1].push_back(i);
+  }
+}
+bool GetNextSubsetPartitionEnum( int numItems, int numParts, vector< vector<int> > &parts ) {
+  // assuming all the elements in @parts is distinct and the number of these elements is @numItems
+   int n =numItems;
+   int p =numParts;
+   if (((int)parts.size()) != p)
+     return false;
+   for (int i=0;i<(int)parts.size();i++) {
+     if (parts[i].empty())
+       return false;
+     sort(parts[i].begin(), parts[i].end());
+   }
+   vector<int> M;
+   vector<int> K;
+   M.reserve(n);
+   K.reserve(n);
+   for (int i =0; i<n;i++) {
+     M.push_back(0);
+     K.push_back(0);
+   }
+   int lastmin =-1;
+   for (int i=0;i<(int)parts.size();++i) {
+     int mmin =n;
+     int key =-1;
+     for (int j=0;j<(int)parts.size();++j) {
+       if (parts[j][0] >lastmin && parts[j][0] <mmin) {
+         key =j;
+         mmin =parts[j][0];
+       }
+     }
+     lastmin =mmin;
+     for (int j=0;j<(int)parts[key].size();++j) {
+        K[parts[key][j]] =i;
+     }
+   }
+   M[0] =K[0];
+   for (int i=1;i<n;i++) {
+     if (K[i] >M[i-1])
+       M[i] =K[i];
+     else
+       M[i] =M[i-1];
+   }
+
+
+   bool success =false;
+   for (int i=n-1;i>=1;--i) {
+     if (K[i]<p-1 && K[i]<=M[i-1]) {
+       success =true;
+       K[i] =K[i]+1;
+       if (K[i]>M[i])
+         M[i] =K[i];
+       for (int j=i+1;j<=n-(p-M[i]);++j) {
+         K[j] =0;
+         M[j] =M[i];
+       }
+       for (int j=n-(p-M[i])+1;j<=n-1;++j) {
+         K[j] =p-(n-j);
+         M[j] =p-(n-j);
+       }
+       break;
+     }
+   }
+   if (!success)
+     return false;
+   parts.clear();
+   for (int i=0;i<p;i++) {
+     parts.push_back(vector<int>());
+   }
+   for (int i=0;i<n;i++) {
+     parts[K[i]].push_back(i);
+   }
+   return true;
+}
+
+// another enumeration: we have n items, need to consider all possible splits of n into k parts where there is a limit of sizes for
+// each of the k parts. E.g. n=10, 3 types, bounds=2,4,8 (type 1 has no more than 2, type-2 has no more than 4 and type-3 has no more than 8)
+// we assume sum of these bounds >=n. Otherwise fatal error. Then we can have [1,3,6],[0,2,8] and so on
+// in the case lower bounds are small, we start with the last entry being the highest number
+void InitBoundedPartitionEnum(int numItems, const vector<int> &lowerBoundsOnParts, const vector<int> &upperBoundsOnParts, vector<int> &partSizes)
+{
+	YW_ASSERT_INFO( upperBoundsOnParts.size() == lowerBoundsOnParts.size(), "Bound sizes: mismatch" );
+	YW_ASSERT_INFO(upperBoundsOnParts.size() >=1, "Must have at least one partition");
+	YW_ASSERT_INFO( SumIntVector(upperBoundsOnParts) >= numItems, "InitBoundedPartitionEnum: upper bounds too small" );
+	int sumLBs = SumIntVector(lowerBoundsOnParts);
+	YW_ASSERT_INFO( sumLBs <= numItems, "InitBoundedPartitionEnum: lower bounds too large" );
+	// now start enumerate
+	partSizes = lowerBoundsOnParts;
+	partSizes[partSizes.size()-1] = numItems - sumLBs;
+//cout << "InitBoundedPartitionEnum: partSizes = ";
+//DumpIntVec(partSizes);
+}
+
+bool GetNextBoundedPartitionEnum(int numItems, const vector<int> &lowerBoundsOnParts, const vector<int> &upperBoundsOnParts, vector<int> &partSizes)
+{
 #if 0
 cout << "numItems = " << numItems << ", LBs = ";
 DumpIntVec( lowerBoundsOnParts );
@@ -1763,424 +1991,463 @@ DumpIntVec( upperBoundsOnParts );
 cout << "Current part sizes = ";
 DumpIntVec( partSizes );
 #endif
-  // in general, try to increase the rightmost (the last part) size unless it is
-  // already at the limit that is, search for the second rightmost part (the
-  // rightmost one is fixed once the other is fixed)  that is not at its upper
-  // bound yet
-  int pos = -1;
-  int sumRight = 0;
-  for (pos = (int)partSizes.size() - 2; pos >= 0; --pos) {
-    //
-    if (partSizes[pos] < upperBoundsOnParts[pos]) {
-      break;
-    }
-    sumRight += partSizes[pos];
-  }
-  // cout << "GetNextBoundedPartitionEnum: pos = " << pos << ", sumRight = " <<
-  // sumRight << endl;
-  // if pos is not found (<0), done
-  if (pos < 0) {
-    return false;
-  }
-  // inc the current pos by 1 and reset the positions to its right to lower
-  // bound
-  partSizes[pos]++;
-  sumRight--;
-  for (int p = pos + 1; p < (int)partSizes.size() - 1; ++p) {
-    partSizes[p] = lowerBoundsOnParts[p];
-    sumRight -= lowerBoundsOnParts[p];
-  }
-  partSizes[(int)partSizes.size() - 1] += sumRight;
-  YW_ASSERT_INFO(partSizes[(int)partSizes.size() - 1] <=
-                         upperBoundsOnParts[(int)partSizes.size() - 1] &&
-                     partSizes[(int)partSizes.size() - 1] >=
-                         lowerBoundsOnParts[(int)partSizes.size() - 1],
-                 "Part sizes: wrong");
-  // cout << "GetNextBoundedPartitionEnum: partSizes = ";
-  // DumpIntVec(partSizes);
-  return true;
-}
-
-void UnionStrings(const set<string> &s1, const set<string> &s2,
-                  set<string> &resSet) {
-  resSet.clear();
-  resSet = s1;
-  for (set<string>::iterator it = s2.begin(); it != s2.end(); ++it) {
-    resSet.insert(*it);
-  }
+	// in general, try to increase the rightmost (the last part) size unless it is already at the limit
+	// that is, search for the second rightmost part (the rightmost one is fixed once the other is fixed)  that is not at its upper bound yet
+	int pos = -1;
+	int sumRight=0;
+	for(pos = (int)partSizes.size()-2; pos >=0; --pos)
+	{
+		//
+		if( partSizes[pos] < upperBoundsOnParts[pos] )
+		{
+			break;
+		}
+		sumRight+= partSizes[pos];
+	}
+//cout << "GetNextBoundedPartitionEnum: pos = " << pos << ", sumRight = " << sumRight << endl;
+	// if pos is not found (<0), done
+	if( pos<0)
+	{
+		return false;
+	}
+	// inc the current pos by 1 and reset the positions to its right to lower bound
+	partSizes[pos]++;
+	sumRight-- ;
+	for( int p=pos+1; p<(int)partSizes.size()-1; ++p )
+	{
+		partSizes[p] = lowerBoundsOnParts[p];
+		sumRight -= lowerBoundsOnParts[p];
+	}
+	partSizes[ (int)partSizes.size()-1 ] += sumRight;
+	YW_ASSERT_INFO( partSizes[(int)partSizes.size()-1] <=upperBoundsOnParts[(int)partSizes.size()-1]
+		&& partSizes[(int)partSizes.size()-1] >=lowerBoundsOnParts[(int)partSizes.size()-1], "Part sizes: wrong" );
+//cout << "GetNextBoundedPartitionEnum: partSizes = ";
+//DumpIntVec(partSizes);
+	return true;
 }
-bool AreStringsSubsetOf(const set<string> &s1Contained,
-                        const set<string> &s2Container) {
-  if (s1Contained.size() > s2Container.size()) {
-    return false;
-  }
-  for (set<string>::iterator it = s1Contained.begin(); it != s1Contained.end();
-       ++it) {
-    if (s2Container.find(*it) == s2Container.end()) {
-      return false;
-    }
-  }
-  return true;
+
+
+void UnionStrings(const set<string> &s1, const set<string> &s2, set<string> &resSet)
+{
+	resSet.clear();
+	resSet = s1;
+	for( set<string> :: iterator it = s2.begin(); it != s2.end(); ++it )
+	{
+		resSet.insert(*it);
+	}
+}
+bool AreStringsSubsetOf(const set<string> &s1Contained, const set<string> &s2Container)
+{
+	if( s1Contained.size() > s2Container.size() )
+	{
+		return false;
+	}
+	for( set<string> :: iterator it = s1Contained.begin(); it != s1Contained.end(); ++it )
+	{
+		if(s2Container.find(*it) == s2Container.end() )
+		{
+			return false;
+		}
+	}
+	return true;
 }
 
-int SumIntVector(const vector<int> &vecInts) {
-  int res = 0;
-  for (int i = 0; i < (int)vecInts.size(); ++i) {
-    res += vecInts[i];
-  }
-  return res;
+int SumIntVector(const vector<int> &vecInts)
+{
+	int res =0;
+	for(int i=0; i<(int)vecInts.size(); ++i)
+	{
+		res += vecInts[i];
+	}
+	return res;
 }
 
-double GetSumOfElements(const vector<double> &listVals) {
-  double res = 0.0;
-  for (int i = 0; i < (int)listVals.size(); ++i) {
-    res += listVals[i];
-  }
-  return res;
-}
-
-void FindAllVectorsKStatesLen(int ks, int lenVec,
-                              vector<vector<int> > &listAllVecs,
-                              bool fOrderByStates) {
-  // find all vectors with certain length and can choose from some states 0 to
-  // ks-1 fOrderByStates: means vectors in states must be ordered in their first
-  // apearnce that is, 2,3,1,2,3 ==> 1,2,3,1,2
-  listAllVecs.clear();
-  // recursively: start with a single length
-  if (lenVec < 1) {
-    // nothing
-    return;
-  }
-  if (lenVec == 1) {
-    // have ks states: 0,1,...ks-1
-    for (int i = 0; i < ks; ++i) {
-      vector<int> vec;
-      vec.push_back(i);
-      listAllVecs.push_back(vec);
-    }
-  } else {
-    // recurisvely perform it
-    vector<vector<int> > listVecsOneLess;
-    FindAllVectorsKStatesLen(ks, lenVec - 1, listVecsOneLess);
-    for (int jj = 0; jj < (int)listVecsOneLess.size(); ++jj) {
-      // for each append one more
-      int nsStart = 0;
-      if (fOrderByStates == true) {
-        // find the largest item so far and start with it
-        for (int kk = 0; kk < (int)listVecsOneLess[jj].size(); ++kk) {
-          if (listVecsOneLess[jj][kk] > nsStart) {
-            nsStart = listVecsOneLess[jj][kk];
-          }
-        }
-      }
-      for (int i = nsStart; i < ks; ++i) {
-        vector<int> vecnew = listVecsOneLess[jj];
-        vecnew.push_back(i);
-        listAllVecs.push_back(vecnew);
-      }
-    }
-  }
+double GetSumOfElements(const vector<double> &listVals)
+{
+	double res =0.0;
+	for(int i=0; i<(int)listVals.size(); ++i)
+	{
+		res += listVals[i];
+	}
+	return res;
+}
+
+
+void FindAllVectorsKStatesLen(int ks, int lenVec, vector< vector<int> >&listAllVecs, bool fOrderByStates)
+{
+	// find all vectors with certain length and can choose from some states 0 to ks-1
+	// fOrderByStates: means vectors in states must be ordered in their first apearnce
+	// that is, 2,3,1,2,3 ==> 1,2,3,1,2
+	listAllVecs.clear();
+	// recursively: start with a single length
+	if( lenVec < 1)
+	{
+		// nothing
+		return;
+	}
+	if( lenVec == 1)
+	{
+		// have ks states: 0,1,...ks-1
+		for(int i=0; i<ks; ++i)
+		{
+			vector<int> vec;
+			vec.push_back(i);
+			listAllVecs.push_back( vec );
+		}
+	}
+	else
+	{
+		// recurisvely perform it
+		vector<vector<int> > listVecsOneLess;
+		FindAllVectorsKStatesLen(ks, lenVec-1, listVecsOneLess);
+		for(int jj=0; jj<(int)listVecsOneLess.size(); ++jj)
+		{
+			// for each append one more
+			int nsStart = 0;
+			if(fOrderByStates == true)
+			{
+				// find the largest item so far and start with it
+				for(int kk=0; kk<(int)listVecsOneLess[jj].size(); ++kk)
+				{
+					if( listVecsOneLess[jj][kk] > nsStart)
+					{
+						nsStart = listVecsOneLess[jj][kk];
+					}
+				}
+			}
+			for(int i=nsStart; i<ks; ++i)
+			{
+				vector<int> vecnew = listVecsOneLess[jj];
+				vecnew.push_back(i);
+				listAllVecs.push_back(vecnew);
+			}
+		}
+	}
+}
+
+void EraseCommonItemsFrom( vector<int> &listItems1, vector<int> &listItems2)
+{
+	// remove shared common items
+	// first sort the list
+	SortIntVec(listItems1);
+	SortIntVec(listItems2);
+//cout << "Before EraseCommonItemsFrom: \n";
+//DumpIntVec(listItems1);
+//DumpIntVec(listItems2);
+	vector<int> listItemNew1, listItemNew2;
+	// iterate through the two list concurrently, and avoid one common item when needed
+	int pos1 = 0, pos2 = 0;
+	while( pos1 <(int)listItems1.size() && pos2 <(int)listItems2.size() )
+	{
+		// if one item is bigger than move it
+		if( listItems1[pos1] < listItems2[pos2] )
+		{
+			// put the item to new list
+			listItemNew1.push_back( listItems1[pos1] );
+			pos1++;
+		}
+		else if( listItems1[pos1] > listItems2[pos2]  )
+		{
+			listItemNew2.push_back( listItems2[pos2] );
+			pos2++;
+		}
+		else
+		{
+			// move together but skip the common items
+			pos1++;
+			pos2++;
+		}
+	}
+	// now add whatever left over to the two list
+	for(int i=pos1; i<(int)listItems1.size(); ++i)
+	{
+		listItemNew1.push_back( listItems1[i] );
+	}
+	for(int i=pos2; i<(int)listItems2.size(); ++i)
+	{
+		listItemNew2.push_back( listItems2[i] );
+	}
+	listItems1 = listItemNew1;
+	listItems2 = listItemNew2;
+//cout << "AFTER EraseCommonItemsFrom: \n";
+//DumpIntVec(listItems1);
+//DumpIntVec(listItems2);
+}
+
+void OffsetIntSetBy( set<int> &ss, int offset)
+{
+	//
+	set<int> sres;
+	for( set<int> :: iterator it = ss.begin(); it != ss.end(); ++it)
+	{
+		sres.insert( (*it) + offset );
+	}
+	ss = sres;
 }
 
-void EraseCommonItemsFrom(vector<int> &listItems1, vector<int> &listItems2) {
-  // remove shared common items
-  // first sort the list
-  SortIntVec(listItems1);
-  SortIntVec(listItems2);
-  // cout << "Before EraseCommonItemsFrom: \n";
-  // DumpIntVec(listItems1);
-  // DumpIntVec(listItems2);
-  vector<int> listItemNew1, listItemNew2;
-  // iterate through the two list concurrently, and avoid one common item when
-  // needed
-  int pos1 = 0, pos2 = 0;
-  while (pos1 < (int)listItems1.size() && pos2 < (int)listItems2.size()) {
-    // if one item is bigger than move it
-    if (listItems1[pos1] < listItems2[pos2]) {
-      // put the item to new list
-      listItemNew1.push_back(listItems1[pos1]);
-      pos1++;
-    } else if (listItems1[pos1] > listItems2[pos2]) {
-      listItemNew2.push_back(listItems2[pos2]);
-      pos2++;
-    } else {
-      // move together but skip the common items
-      pos1++;
-      pos2++;
+
+static int QSortComparePairs( const void *arg1, const void *arg2 )
+{
+   /* Compare all of both strings: */
+    // assume sorting in accending order
+    pair<int, void *>  p1 = *((pair<int, void *>  *) arg1);
+    pair<int, void *>  p2 = *((pair<int, void *>  *) arg2);
+//cout <<"arg1 = " << n1 << ", arg2 = " << n2 << endl;
+    if( p1.first > p2.first)
+    {
+        return 1;
+    }
+    else if( p1.first < p2.first)
+    {
+        return -1;
+    }
+    else
+    {
+        return 0;
     }
-  }
-  // now add whatever left over to the two list
-  for (int i = pos1; i < (int)listItems1.size(); ++i) {
-    listItemNew1.push_back(listItems1[i]);
-  }
-  for (int i = pos2; i < (int)listItems2.size(); ++i) {
-    listItemNew2.push_back(listItems2[i]);
-  }
-  listItems1 = listItemNew1;
-  listItems2 = listItemNew2;
-  // cout << "AFTER EraseCommonItemsFrom: \n";
-  // DumpIntVec(listItems1);
-  // DumpIntVec(listItems2);
-}
-
-void OffsetIntSetBy(set<int> &ss, int offset) {
-  //
-  set<int> sres;
-  for (set<int>::iterator it = ss.begin(); it != ss.end(); ++it) {
-    sres.insert((*it) + offset);
-  }
-  ss = sres;
-}
-
-static int QSortComparePairs(const void *arg1, const void *arg2) {
-  /* Compare all of both strings: */
-  // assume sorting in accending order
-  pair<int, void *> p1 = *((pair<int, void *> *)arg1);
-  pair<int, void *> p2 = *((pair<int, void *> *)arg2);
-  // cout <<"arg1 = " << n1 << ", arg2 = " << n2 << endl;
-  if (p1.first > p2.first) {
-    return 1;
-  } else if (p1.first < p2.first) {
-    return -1;
-  } else {
-    return 0;
-  }
 }
 
-void SortPairsByNums(vector<pair<int, void *> > &listPairs) {
-  //#if 0
-  if (listPairs.size() <= 1) {
-    // do nothing
-    return;
-  }
-  // cout << "Before sort, double vec = ";
-  // DumpDoubleVec( vecVals );
-  int sortLen = (int)listPairs.size();
-
-  int start = 0;
-  int end = sortLen - 1;
-  pair<int, void *> *array = new pair<int, void *>[sortLen];
-  for (int i = start; i <= end; ++i) {
-    array[i - start] = listPairs[i];
-  }
-  qsort((void *)array, sortLen, sizeof(pair<int, void *>), QSortComparePairs);
-  // Now write back
-  for (int i = start; i <= end; ++i) {
-    listPairs[i] = array[i - start];
-  }
 
-  delete[] array;
-  //#endif
-  // cout << "After sort, double vec = ";
-  // DumpDoubleVec( vecVals );
-}
-
-static int QSortComparePairsDouble(const void *arg1, const void *arg2) {
-  /* Compare all of both strings: */
-  // assume sorting in accending order
-  pair<double, void *> p1 = *((pair<double, void *> *)arg1);
-  pair<double, void *> p2 = *((pair<double, void *> *)arg2);
-  // cout <<"arg1 = " << n1 << ", arg2 = " << n2 << endl;
-  if (p1.first > p2.first) {
-    return 1;
-  } else if (p1.first < p2.first) {
-    return -1;
-  } else {
-    return 0;
-  }
+void SortPairsByNums(vector< pair<int, void *> > &listPairs)
+{
+//#if 0
+    if( listPairs.size() <= 1)
+    {
+        // do nothing
+        return;
+    }
+//cout << "Before sort, double vec = ";
+//DumpDoubleVec( vecVals );
+	int sortLen = (int)listPairs.size();
+
+	int start = 0;
+	int end = sortLen -1;
+    pair<int, void *> *array = new pair<int, void *>[sortLen];
+    for(int i=start; i<= end; ++i)
+    {
+        array[i-start] = listPairs[i];
+    }
+    qsort( (void *)array, sortLen, sizeof( pair<int, void *> ), QSortComparePairs );
+    // Now write back
+    for(int i=start; i<=end; ++i)
+    {
+        listPairs[i] = array[i-start];
+    }
+
+    delete [] array;
+//#endif
+//cout << "After sort, double vec = ";
+//DumpDoubleVec( vecVals );
 }
 
-void SortPairsByNumsDouble(vector<pair<double, void *> > &listPairs) {
-  if (listPairs.size() <= 1) {
-    // do nothing
-    return;
-  }
-  // cout << "Before sort, double vec = ";
-  // DumpDoubleVec( vecVals );
-  int sortLen = (int)listPairs.size();
-
-  int start = 0;
-  int end = sortLen - 1;
-  pair<double, void *> *array = new pair<double, void *>[sortLen];
-  for (int i = start; i <= end; ++i) {
-    array[i - start] = listPairs[i];
-  }
-  qsort((void *)array, sortLen, sizeof(pair<double, void *>),
-        QSortComparePairsDouble);
-  // Now write back
-  for (int i = start; i <= end; ++i) {
-    listPairs[i] = array[i - start];
-  }
+static int QSortComparePairsDouble( const void *arg1, const void *arg2 )
+{
+   /* Compare all of both strings: */
+    // assume sorting in accending order
+    pair<double, void *>  p1 = *((pair<double, void *>  *) arg1);
+    pair<double, void *>  p2 = *((pair<double, void *>  *) arg2);
+//cout <<"arg1 = " << n1 << ", arg2 = " << n2 << endl;
+    if( p1.first > p2.first)
+    {
+        return 1;
+    }
+    else if( p1.first < p2.first)
+    {
+        return -1;
+    }
+    else
+    {
+        return 0;
+    }
+}
+
+void SortPairsByNumsDouble(vector< pair<double, void *> > &listPairs)
+{
+    if( listPairs.size() <= 1)
+    {
+        // do nothing
+        return;
+    }
+//cout << "Before sort, double vec = ";
+//DumpDoubleVec( vecVals );
+	int sortLen = (int)listPairs.size();
+
+	int start = 0;
+	int end = sortLen -1;
+    pair<double, void *> *array = new pair<double, void *>[sortLen];
+    for(int i=start; i<= end; ++i)
+    {
+        array[i-start] = listPairs[i];
+    }
+    qsort( (void *)array, sortLen, sizeof( pair<double, void *> ), QSortComparePairsDouble );
+    // Now write back
+    for(int i=start; i<=end; ++i)
+    {
+        listPairs[i] = array[i-start];
+    }
 
-  delete[] array;
+    delete [] array;
 }
 
 //**************************************************************************************************************
 // Ruhua Jiang's code for enumeration
 
-static void convert_index_to_vector_helper(bool store_enum, int query_index,
-                                           int color_num, int box_num,
-                                           int &count, vector_t &vec,
-                                           vector_t &result,
-                                           vector_vector_t &enumeration) {
-  if (result.size() != 0 && !store_enum)
-    return;
-  // Base case
-  if (color_num == 1) {
-    vec.push_back(box_num);
-    count++;
-    if (store_enum)
-      enumeration.push_back(vec);
-
-    if (count - 1 == query_index) {
-      // std::cout<<count-1<<"\t";
-      for (int k = 0; k < (int)vec.size(); k++) {
-        // std::cout<<vec[k]<<" ";
-        result.push_back(vec[k]);
-      }
-    }
-    vec.pop_back();
-    // std::cout<<endl;
-    return;
-  }
-  // Recursion
-  for (int i = box_num; i >= 0; i--) {
-    vec.push_back(i);
-    convert_index_to_vector_helper(store_enum, query_index, color_num - 1,
-                                   box_num - i, count, vec, result,
-                                   enumeration);
-    vec.pop_back();
-  }
+static void convert_index_to_vector_helper(bool store_enum,int query_index,int color_num, int box_num, int & count, vector_t & vec, vector_t &result, vector_vector_t & enumeration)
+{
+	if(result.size() !=0  && !store_enum) return;
+	//Base case
+	if(color_num == 1){
+        vec.push_back(box_num);
+        count++;
+        if (store_enum) enumeration.push_back(vec);
+
+        if(count -1 == query_index)
+        {
+            //std::cout<<count-1<<"\t";
+            for(int k = 0; k < (int)vec.size();k++){
+                //std::cout<<vec[k]<<" ";
+                result.push_back(vec[k]);
+            }
+        }
+        vec.pop_back();
+        //std::cout<<endl;
+        return;
+	}
+	//Recursion
+	for(int i = box_num; i >= 0;i--){
+		vec.push_back(i);
+		convert_index_to_vector_helper(store_enum,query_index,color_num-1, box_num - i,count,vec, result,enumeration);
+		vec.pop_back();
+	}
 }
 
-static void convert_vector_to_int_helper(bool store_enum, vector_t query_vec,
-                                         int color_num, int box_num, int &count,
-                                         vector_t &vec, bool &find,
-                                         vector_vector_t &enumeration) {
-  if (find && !store_enum)
-    return;
-  // Base case
-  if (color_num == 1) {
-    vec.push_back(box_num);
-    count++;
-    if (store_enum)
-      enumeration.push_back(vec);
-    if (vec == query_vec) {
-      find = true;
-    }
-    vec.pop_back();
-    // std::cout<<endl;
-    return;
-  }
-  // Recursion
-  for (int i = box_num; i >= 0; i--) {
-    vec.push_back(i);
-    convert_vector_to_int_helper(store_enum, query_vec, color_num - 1,
-                                 box_num - i, count, vec, find, enumeration);
-    vec.pop_back();
-  }
+static void convert_vector_to_int_helper(bool store_enum,vector_t query_vec,int color_num, int box_num, int & count, vector_t & vec, bool &find,vector_vector_t & enumeration)
+{
+	if(find && !store_enum) return;
+	//Base case
+	if(color_num == 1){
+        vec.push_back(box_num);
+        count++;
+        if (store_enum) enumeration.push_back(vec);
+        if(vec == query_vec)
+        {
+            find = true;
+        }
+        vec.pop_back();
+        //std::cout<<endl;
+        return;
+	}
+	//Recursion
+	for(int i = box_num; i >= 0;i--){
+		vec.push_back(i);
+		convert_vector_to_int_helper(store_enum,query_vec,color_num-1, box_num - i,count,vec,find,enumeration);
+		vec.pop_back();
+	}
 }
 
-// Returns whether enumeration is stored or not. If index is not find,
-// result.size() still 0
-bool convert_index_to_vector(bool enum_already_set, int color_num, int box_num,
-                             int index, vector_t &result,
-                             vector_vector_t &enumeration) {
-  int count = 0;
-  vector_t vec;
-  // if enumeration is stored or not, then directly access
-  if (enum_already_set) {
-    if (index < (int)enumeration.size()) {
-      for (int k = 0; k < (int)enumeration[index].size(); k++) {
-        result.push_back(enumeration[index][k]);
-      }
-      // std::cout<<"direct access!";  //uncomments this line if want test
-      // whether direct access success or not
-    }
-    return true;
-  } else {
 
-    if (color_num > BOX_NUM_THRESHOLD ||
-        box_num > COLOR_NUM_THRESHOLD) // c and n too large, we do not store
-                                       // enumeration
-    {
-      convert_index_to_vector_helper(false, index, color_num, box_num, count,
-                                     vec, result, enumeration);
-      return false;
-    } else {
-      convert_index_to_vector_helper(true, index, color_num, box_num, count,
-                                     vec, result, enumeration);
-      return true;
-    }
-  }
+//Returns whether enumeration is stored or not. If index is not find, result.size() still 0
+bool convert_index_to_vector(bool enum_already_set,int color_num, int box_num, int index, vector_t &result,vector_vector_t & enumeration)
+{
+	int count=0;
+	vector_t vec;
+	//if enumeration is stored or not, then directly access
+	if(enum_already_set) {
+		if(index < (int)enumeration.size())
+		{
+			for(int k = 0; k < (int)enumeration[index].size();k++){
+                result.push_back(enumeration[index][k]);
+			}
+			//std::cout<<"direct access!";  //uncomments this line if want test whether direct access success or not
+		}
+		return true;
+	}
+	else{
+
+		if(color_num > BOX_NUM_THRESHOLD || box_num >COLOR_NUM_THRESHOLD) // c and n too large, we do not store enumeration
+		{
+			convert_index_to_vector_helper(false,index,color_num,box_num,count,vec,result,enumeration);
+			return false;
+		}
+		else
+		{
+			convert_index_to_vector_helper(true,index,color_num,box_num,count,vec,result,enumeration);
+			return true;
+		}
+	}
+
 }
 
-// Returns whether enumeration is stored or not. If query_vec is not find,
-// result_index is set to -1
-bool convert_vector_to_index(bool enum_already_set, vector_t query_vec,
-                             int &result_index, vector_vector_t &enumeration) {
-  int color_num = query_vec.size(), box_num = 0, index = 0;
-  for (int i = 0; i < (int)query_vec.size(); i++)
-    box_num += query_vec[i];
-  vector_t vec;
-  bool find = false;
+//Returns whether enumeration is stored or not. If query_vec is not find, result_index is set to -1
+bool convert_vector_to_index(bool enum_already_set,vector_t query_vec,int &result_index,vector_vector_t & enumeration)
+{
+	int color_num = query_vec.size(), box_num=0, index =0;
+	for(int i=0; i< (int)query_vec.size();i++)box_num+=query_vec[i];
+	vector_t vec;
+	bool find = false;
 
-  // if enumeration is stored or not, then directly compare
-  if (enum_already_set) {
-    for (int i = 0; i < (int)enumeration.size(); i++) {
-      if (query_vec == enumeration[i]) {
-        result_index = i;
-        // std::cout<<"direct access!";  //uncomments this line if want test
-        // whether direct access success or not
-        return enum_already_set;
-      }
-    }
+	//if enumeration is stored or not, then directly compare
+	if(enum_already_set) {
+		for(int i=0; i< (int)enumeration.size(); i++){
+			if(query_vec == enumeration[i]){
+				result_index = i;
+				//std::cout<<"direct access!";  //uncomments this line if want test whether direct access success or not
+				return  enum_already_set;
+			}
+		}
 
-    result_index = -1;
+		result_index = -1;
+
+        // is this correct???
+        return false;
+	}
+	else
+	{
+		if(color_num > BOX_NUM_THRESHOLD || box_num >COLOR_NUM_THRESHOLD)
+		{
+			convert_vector_to_int_helper(false,query_vec,color_num, box_num, index, vec, find,enumeration);
+			result_index = index -1;
+			return false;
+		}
+		else{
+			convert_vector_to_int_helper(true,query_vec,color_num, box_num, index, vec, find,enumeration);
+			result_index = index -1;
+			return false;
+		}
+	}
 
-    // is this correct???
-    return false;
-  } else {
-    if (color_num > BOX_NUM_THRESHOLD || box_num > COLOR_NUM_THRESHOLD) {
-      convert_vector_to_int_helper(false, query_vec, color_num, box_num, index,
-                                   vec, find, enumeration);
-      result_index = index - 1;
-      return false;
-    } else {
-      convert_vector_to_int_helper(true, query_vec, color_num, box_num, index,
-                                   vec, find, enumeration);
-      result_index = index - 1;
-      return false;
-    }
-  }
-}
 
-void ZeroOutVec(vector<int> &vec) {
-  for (int i = 0; i < (int)vec.size(); ++i) {
-    vec[i] = 0;
-  }
 }
 
-void GetFourPartsIncompatSplits(const set<int> &setAll, const set<int> &split1,
-                                const set<int> &split2, set<int> &part1,
-                                set<int> &part2, set<int> &part3,
-                                set<int> &part4) {
-  //
-  set<int> split1b = setAll;
-  SubtractSets(split1b, split1);
-  set<int> split2b = setAll;
-  SubtractSets(split2b, split2);
-  JoinSets(split1, split2, part1);
-  JoinSets(split1, split2b, part2);
-  JoinSets(split1b, split2, part3);
-  JoinSets(split1b, split2b, part4);
+void ZeroOutVec(vector<int> &vec)
+{
+    for(int i=0;i<(int)vec.size(); ++i)
+    {
+        vec[i] = 0;
+    }
 }
 
-bool IsAllZeroVec(const vector<int> &vec) {
-  for (int i = 0; i < (int)vec.size(); ++i) {
-    if (vec[i] != 0) {
-      return false;
+void GetFourPartsIncompatSplits( const set<int> &setAll, const set<int> &split1, const set<int> &split2, set<int> &part1, set<int> &part2, set<int> &part3, set<int> &part4 )
+{
+    //
+    set<int> split1b = setAll;
+    SubtractSets(split1b, split1);
+    set<int> split2b = setAll;
+    SubtractSets(split2b, split2);
+    JoinSets( split1, split2, part1 );
+    JoinSets( split1, split2b, part2);
+    JoinSets( split1b, split2, part3 );
+    JoinSets( split1b, split2b, part4);
+}
+
+bool IsAllZeroVec(const vector<int> &vec)
+{
+    for(int i=0;i<(int)vec.size();++i)
+    {
+        if(vec[i] != 0 )
+        {
+            return false;
+        }
     }
-  }
-  return true;
+    return true;
 }
diff --git a/trisicell/external/scistree/Utils3.h b/trisicell/external/scistree/Utils3.h
index 247bdd5..f59b546 100644
--- a/trisicell/external/scistree/Utils3.h
+++ b/trisicell/external/scistree/Utils3.h
@@ -1,15 +1,15 @@
 #ifndef UTILS3_H
 #define UTILS3_H
 
-#include <cstdio>
-#include <cstdlib>
-#include <fstream>
-#include <iostream>
 #include <list>
-#include <map>
+#include <vector>
 #include <set>
+#include <map>
 #include <string>
-#include <vector>
+#include <iostream>
+#include <fstream>
+#include <cstdio>
+#include <cstdlib>
 
 using namespace std;
 
@@ -25,55 +25,57 @@ using namespace std;
 // ***************************************************************************
 
 // Abstract class for item stored in my hash table
-class YWHashItem {
+class YWHashItem
+{
 public:
-  virtual ~YWHashItem() = 0;
-  virtual int Key() = 0;
-  virtual bool operator==(const YWHashItem &rhs) = 0;
+    virtual ~YWHashItem() = 0;
+    virtual int Key() = 0;
+    virtual bool operator==(const YWHashItem &rhs) = 0;
 };
 
 // This is the hash table that mgiht be useful in some applications
 // Note that this is a rather static HASH table: you can only add stuff in
 // but can not remove. TBD
-class YWHashTable {
+class YWHashTable
+{
 public:
-  YWHashTable(int numBuckets = 100);
-  ~YWHashTable(); // NOTE: has to free memory here
-  void AddItem(YWHashItem *pItem);
-  YWHashItem *GetIdenticalItem(YWHashItem *pItem);
-  YWHashItem *GetFirstItem();
-  YWHashItem *GetNextItem();
-  int GetTotalItemNum() const;
-  void Dump() const;
+    YWHashTable(int numBuckets = 100);
+    ~YWHashTable(); // NOTE: has to free memory here
+    void AddItem(YWHashItem *pItem);
+    YWHashItem *GetIdenticalItem(YWHashItem *pItem);
+    YWHashItem *GetFirstItem();
+    YWHashItem *GetNextItem();
+    int GetTotalItemNum() const;
+    void Dump() const;
 
 private:
-  int numBuckets;
-  //    vector< vector<YWHashItem *> > hashTable;
+    int numBuckets;
+    //    vector< vector<YWHashItem *> > hashTable;
 
-  // Sorry we have not implemented hashing yet
-  vector<YWHashItem *> hashTable;
+    // Sorry we have not implemented hashing yet
+    vector<YWHashItem *> hashTable;
 
-  // TBD. These are for enumeation only. BUT only support single enumeration
-  // PLEASE do not use in a double loop
-  int curPos;
+    // TBD. These are for enumeation only. BUT only support single enumeration
+    // PLEASE do not use in a double loop
+    int curPos;
 };
 
 // used to support STL
-class SequenceCmp //: public binary_function<SequenceCmp &, const SEQUENCE &,
-                  //const SEQUENCE>
+class SequenceCmp //: public binary_function<SequenceCmp &, const SEQUENCE &, const SEQUENCE>
 {
 public:
-  bool operator()(const SEQUENCE &seq1, const SEQUENCE &seq2) const;
+    bool operator()(const SEQUENCE &seq1, const SEQUENCE &seq2) const;
 };
 
 // iterator pattern
-class GenericIterator {
+class GenericIterator
+{
 public:
-  virtual ~GenericIterator() {}
-  virtual void First() = 0;
-  virtual void Next() = 0;
-  virtual bool IsDone() = 0;
-  // virtual void *GetCurItem() = 0;
+    virtual ~GenericIterator() {}
+    virtual void First() = 0;
+    virtual void Next() = 0;
+    virtual bool IsDone() = 0;
+    //virtual void *GetCurItem() = 0;
 };
 
 // ***************************************************************************
@@ -84,13 +86,10 @@ int GetIntervalLen(const INTERVAL &iv);
 int GetRandItemInSet(const set<int> &items);
 int GetRandItemInVec(const vector<int> &items);
 void GetRandVector(vector<int> &rndVec, int start, int end);
-int GetWeightedRandItemInVec(const vector<int> &items,
-                             const vector<double> &itemWeights);
+int GetWeightedRandItemInVec(const vector<int> &items, const vector<double> &itemWeights);
 int GetWeightedRandItemIndex(const vector<double> &itemWeights);
-// this function converts the subset over a vector to the subet over the
-// original space
-void GetOrigSubset(const vector<int> &origVec, const set<int> &subsetInd,
-                   set<int> &subsetOrig);
+// this function converts the subset over a vector to the subet over the original space
+void GetOrigSubset(const vector<int> &origVec, const set<int> &subsetInd, set<int> &subsetOrig);
 void MutateSequenceAtSites(SEQUENCE &mutSeq, vector<int> &mutSites);
 void DumpDoubleVec(const vector<double> &vecDoubles);
 void DumpDoubleVec(const vector<long double> &vecDoubles);
@@ -106,88 +105,56 @@ void SortDoubleVec(vector<double> &vecVals, int start = 0, int end = -1);
 void SortDoubleVec(vector<long double> &vecVals, int start = 0, int end = -1);
 void FindUniformColumns(const vector<SEQUENCE> &listSeqs, set<int> &uniSites);
 int FindNoninformativeRow(const vector<SEQUENCE> &listSeqs, int col);
-void BreakSeqAtBkpt(const SEQUENCE &seq, int bkpt, SEQUENCE &seqLeft,
-                    SEQUENCE &seqRight);
+void BreakSeqAtBkpt(const SEQUENCE &seq, int bkpt, SEQUENCE &seqLeft, SEQUENCE &seqRight);
 bool AreTwoSeqsBroken(const SEQUENCE &seqLeft, const SEQUENCE &seqRight);
 
 // support parition-enumeration
 // Suppose we have g groups of (indistingishable) items and we want to
 // divide each group into numParts colors (distinguishable)
-// this support enumerate these choices. For example, we have two segments of 3
-// and 4 items each and we have two colors, then the choices will be: [(1,2),
-// (2,2)], or [(0.3),(1,3)]
-void InitPartitionEnum(const vector<int> &vecSegSizes, int numParts,
-                       vector<vector<int> > &parts);
-bool GetNextPartitionEnum(const vector<int> &vecSegSizes, int numParts,
-                          vector<vector<int> > &parts);
-int GetPartEnumIndex(const vector<int> &vecSegSizes, int numParts,
-                     const vector<vector<int> > &parts);
-void ConvIndexToPartEnum(const vector<int> &vecSegSizes, int numParts,
-                         int pIndex, vector<vector<int> > &parts);
-void ConvIndexToPartEnumVar(const vector<int> &vecSegSizes,
-                            const vector<int> &numParts, int pIndex,
-                            vector<vector<int> > &parts);
-void InitPartitionEnumVar(const vector<int> &vecSegSizes,
-                          const vector<int> &numParts,
-                          vector<vector<int> > &parts);
-bool GetNextPartitionEnumVar(const vector<int> &vecSegSizes,
-                             const vector<int> &numParts,
-                             vector<vector<int> > &parts);
-int GetPartEnumIndexVar(const vector<int> &vecSegSizes,
-                        const vector<int> &numParts,
-                        const vector<vector<int> > &parts);
-void MoveOneItemInPartEnum(const vector<vector<int> > &partsSrc, int part,
-                           int psrc, int pdest,
-                           vector<vector<int> > &partsDest);
+// this support enumerate these choices. For example, we have two segments of 3 and 4 items each
+// and we have two colors, then the choices will be: [(1,2), (2,2)], or [(0.3),(1,3)]
+void InitPartitionEnum(const vector<int> &vecSegSizes, int numParts, vector<vector<int>> &parts);
+bool GetNextPartitionEnum(const vector<int> &vecSegSizes, int numParts, vector<vector<int>> &parts);
+int GetPartEnumIndex(const vector<int> &vecSegSizes, int numParts, const vector<vector<int>> &parts);
+void ConvIndexToPartEnum(const vector<int> &vecSegSizes, int numParts, int pIndex, vector<vector<int>> &parts);
+void ConvIndexToPartEnumVar(const vector<int> &vecSegSizes, const vector<int> &numParts, int pIndex, vector<vector<int>> &parts);
+void InitPartitionEnumVar(const vector<int> &vecSegSizes, const vector<int> &numParts, vector<vector<int>> &parts);
+bool GetNextPartitionEnumVar(const vector<int> &vecSegSizes, const vector<int> &numParts, vector<vector<int>> &parts);
+int GetPartEnumIndexVar(const vector<int> &vecSegSizes, const vector<int> &numParts, const vector<vector<int>> &parts);
+void MoveOneItemInPartEnum(const vector<vector<int>> &partsSrc, int part, int psrc, int pdest, vector<vector<int>> &partsDest);
 int GetPartitionEnumNum(int n, int p);
 int GetPartitionEnumId(int numItemsTot, const vector<int> &vec);
-void GetPartitionEnumPartForId(int numItemsTot, int numParts, int eid,
-                               vector<int> &vec);
+void GetPartitionEnumPartForId(int numItemsTot, int numParts, int eid, vector<int> &vec);
 
 // support another version of partiton-enumeration
-// Suppose we have n (distinct) items, and we want to partition into k groups
-// (each with at least one item) E.g. we have {a,b,c,d} and we want to partition
-// into 3 groups. Then choices are: {a,b,cd}, {ab,c,d}, {ac,b,d}, and so on
-void InitSubsetPartitionEnum(int numItems, int numParts,
-                             vector<vector<int> > &parts);
-bool GetNextSubsetPartitionEnum(int numItems, int numParts,
-                                vector<vector<int> > &parts);
+// Suppose we have n (distinct) items, and we want to partition into k groups (each with at least one item)
+// E.g. we have {a,b,c,d} and we want to partition into 3 groups. Then choices are:
+// {a,b,cd}, {ab,c,d}, {ac,b,d}, and so on
+void InitSubsetPartitionEnum(int numItems, int numParts, vector<vector<int>> &parts);
+bool GetNextSubsetPartitionEnum(int numItems, int numParts, vector<vector<int>> &parts);
 
-// another enumeration: we have n items, need to consider all possible splits of
-// n into k parts where there is a limit of sizes for each of the k parts. E.g.
-// n=10, 3 types, bounds=2,4,8 (type 1 has no more than 2, type-2 has no more
-// than 4 and type-3 has no more than 8) we assume sum of these bounds >=n.
-// Otherwise fatal error. Then we can have [1,3,6],[0,2,8] and so on
-void InitBoundedPartitionEnum(int numItems,
-                              const vector<int> &lowerBoundsOnParts,
-                              const vector<int> &upperBoundsOnParts,
-                              vector<int> &partSizes);
-bool GetNextBoundedPartitionEnum(int numItems,
-                                 const vector<int> &lowerBoundsOnParts,
-                                 const vector<int> &upperBoundsOnParts,
-                                 vector<int> &partSizes);
+// another enumeration: we have n items, need to consider all possible splits of n into k parts where there is a limit of sizes for
+// each of the k parts. E.g. n=10, 3 types, bounds=2,4,8 (type 1 has no more than 2, type-2 has no more than 4 and type-3 has no more than 8)
+// we assume sum of these bounds >=n. Otherwise fatal error. Then we can have [1,3,6],[0,2,8] and so on
+void InitBoundedPartitionEnum(int numItems, const vector<int> &lowerBoundsOnParts, const vector<int> &upperBoundsOnParts, vector<int> &partSizes);
+bool GetNextBoundedPartitionEnum(int numItems, const vector<int> &lowerBoundsOnParts, const vector<int> &upperBoundsOnParts, vector<int> &partSizes);
 
 // new things from treeHMM
-bool GetFirstMutliChoice(int numStage, int numStageElem,
-                         vector<int> &initChoice);
-bool GetNextMutliChoice(int numStage, int numStageElem,
-                        vector<int> &initChoice);
-// void DumpVecSequences( const vector<SEQUENCE> &setSeqs );
-void GetVecSequencesIV(const vector<SEQUENCE> &vecSeqs, int left, int right,
-                       vector<SEQUENCE> &vecSeqsIV);
+bool GetFirstMutliChoice(int numStage, int numStageElem, vector<int> &initChoice);
+bool GetNextMutliChoice(int numStage, int numStageElem, vector<int> &initChoice);
+//void DumpVecSequences( const vector<SEQUENCE> &setSeqs );
+void GetVecSequencesIV(const vector<SEQUENCE> &vecSeqs, int left, int right, vector<SEQUENCE> &vecSeqsIV);
 int GetNumZerosInSeq(const SEQUENCE &seq);
 void GetSeqSplit(const SEQUENCE &seq, set<int> &zeroBits, set<int> &oneBits);
-void SortVecIntPairs(vector<pair<int, int> > &listOfPriority);
+void SortVecIntPairs(vector<pair<int, int>> &listOfPriority);
 void ConvVecToArray(const vector<int> &vec, int *arr);
 void ConvVecToArray(const vector<double> &vec, double *arr);
 void DumpIntArray(int len, int *arr);
 void FlipBinVector(vector<int> &vec);
-void ConvOneSideToFullSplit(vector<int> &split, const set<int> &oneside,
-                            int numLeaves, int val = 1);
+void ConvOneSideToFullSplit(vector<int> &split, const set<int> &oneside, int numLeaves, int val = 1);
 
 // more on missing value
-bool AreTwoMVVecCompat(const vector<int> &vec1, const vector<int> &vec2,
-                       int &numTrueMatch);
+bool AreTwoMVVecCompat(const vector<int> &vec1, const vector<int> &vec2, int &numTrueMatch);
 int GetMVNum(const vector<int> &vec);
 bool AreSeqsOverlap(const vector<int> &vec1, const vector<int> &vec2);
 
@@ -201,17 +168,12 @@ int GetSubstringLeftPos(const INTERVAL_SUBSTRING &substr);
 int GetSubstringRightPos(const INTERVAL_SUBSTRING &substr);
 void GetIVSubstringData(const INTERVAL_SUBSTRING &substr, SEQUENCE &seq);
 INTERVAL GetSubstringInterval(const INTERVAL_SUBSTRING &substr);
-bool GetSubstringSegment(const INTERVAL_SUBSTRING &substr,
-                         const INTERVAL &ivToRead, SEQUENCE &segment);
+bool GetSubstringSegment(const INTERVAL_SUBSTRING &substr, const INTERVAL &ivToRead, SEQUENCE &segment);
 int GetSubstringValAt(const INTERVAL_SUBSTRING &substr, int pos);
-bool IsSegmentContained(const INTERVAL_SUBSTRING &seqContained,
-                        const INTERVAL_SUBSTRING &seqContainer);
-bool AreSegmentsConsistent(const INTERVAL_SUBSTRING &seqContained,
-                           const INTERVAL_SUBSTRING &seqContainer);
-int GetSegmentsIntersection(const INTERVAL_SUBSTRING &seq1,
-                            const INTERVAL_SUBSTRING &seq2, INTERVAL &iv);
-bool AreSegmentsNextto(const INTERVAL_SUBSTRING &seq1,
-                       const INTERVAL_SUBSTRING &seq2);
+bool IsSegmentContained(const INTERVAL_SUBSTRING &seqContained, const INTERVAL_SUBSTRING &seqContainer);
+bool AreSegmentsConsistent(const INTERVAL_SUBSTRING &seqContained, const INTERVAL_SUBSTRING &seqContainer);
+int GetSegmentsIntersection(const INTERVAL_SUBSTRING &seq1, const INTERVAL_SUBSTRING &seq2, INTERVAL &iv);
+bool AreSegmentsNextto(const INTERVAL_SUBSTRING &seq1, const INTERVAL_SUBSTRING &seq2);
 void DumpSubstring(const INTERVAL_SUBSTRING &substr);
 
 // ***************************************************************************
@@ -221,113 +183,103 @@ double GetLogSumOfLogs(const vector<double> &listLogs);
 double GetLogSumOfLogsDirect(const vector<double> &listLogs);
 double GetLogSumOfTwo(double logv1, double logv2);
 double GetSumOfElements(const vector<double> &listVals);
-void SumofLogVecs(vector<double> &listLogsAdded,
-                  vector<double> &listLogsAdding);
+void SumofLogVecs(vector<double> &listLogsAdded, vector<double> &listLogsAdding);
 
 // ***************************************************************************
 // Other utilities
 // ***************************************************************************
-int FindMatchedSeqForFounders(const vector<SEQUENCE> &founder,
-                              const SEQUENCE &seq, set<int> &endRows,
-                              bool fPrefix);
-void RecoverOrigIndicesAfterDeletion(const vector<int> &removedItems,
-                                     const vector<int> &itemsNew,
+int FindMatchedSeqForFounders(const vector<SEQUENCE> &founder, const SEQUENCE &seq,
+                              set<int> &endRows, bool fPrefix);
+void RecoverOrigIndicesAfterDeletion(const vector<int> &removedItems, const vector<int> &itemsNew,
                                      vector<int> &itemsOrigIndices);
-void GetOrigPositionAfterRemoval(int numRemains,
-                                 const vector<int> &itemsRemoved,
-                                 vector<int> &origPosForRemains);
+void GetOrigPositionAfterRemoval(int numRemains, const vector<int> &itemsRemoved, vector<int> &origPosForRemains);
 void InsertOrderedVec(vector<int> &vec, int val);
 template <typename T>
-int binary_search(const std::vector<T> &vec, unsigned start, unsigned end,
-                  const T &key);
+int binary_search(const std::vector<T> &vec, unsigned start, unsigned end, const T &key);
 bool ReadIntListFromFile(const char *fname, vector<int> &listInts);
-void GetVecPosNotInSet(const vector<int> &vec, const set<int> &s,
-                       vector<int> &posDiff);
+void GetVecPosNotInSet(const vector<int> &vec, const set<int> &s, vector<int> &posDiff);
 void AddIntVec(vector<int> &vecDest, const vector<int> &vecSrc);
 void SubtractIntVec(vector<int> &vecDest, const vector<int> &vecSubtracted);
 void GetItemsInRange(const set<int> &items, int lb, int ub, set<int> &sset);
 void InitRandom(int seed);
 void PermuatePseudoRandomVec(vector<int> &vecPerm);
 void UnionMultiset(multiset<int> &setUpdate, const multiset<int> &setAdded);
-void JoinMultiset(const multiset<int> &set1, const multiset<int> &set2,
-                  multiset<int> &setInt);
+void JoinMultiset(const multiset<int> &set1, const multiset<int> &set2, multiset<int> &setInt);
 void ConvMSetToSet(const multiset<int> &mset, set<int> &ss);
 void DumpMultiset(const multiset<int> &mset);
-int CalcNumNChooseK(int n,
-                    int k); // how many ways to choose k items from n items
-void UnionStrings(const set<string> &s1, const set<string> &s2,
-                  set<string> &resSet);
-bool AreStringsSubsetOf(const set<string> &s1Contained,
-                        const set<string> &s2Container);
+int CalcNumNChooseK(int n, int k); // how many ways to choose k items from n items
+void UnionStrings(const set<string> &s1, const set<string> &s2, set<string> &resSet);
+bool AreStringsSubsetOf(const set<string> &s1Contained, const set<string> &s2Container);
 int SumIntVector(const vector<int> &vecInts);
-void FindAllVectorsKStatesLen(int ks, int ns, vector<vector<int> > &listAllVecs,
-                              bool fOrderByStates = false);
+void FindAllVectorsKStatesLen(int ks, int ns, vector<vector<int>> &listAllVecs, bool fOrderByStates = false);
 void EraseCommonItemsFrom(vector<int> &listItems1, vector<int> &listItems2);
 void OffsetIntSetBy(set<int> &ss, int offset);
-void SortPairsByNums(vector<pair<int, void *> > &listPairs);
-void SortPairsByNumsDouble(vector<pair<double, void *> > &listPairs);
+void SortPairsByNums(vector<pair<int, void *>> &listPairs);
+void SortPairsByNumsDouble(vector<pair<double, void *>> &listPairs);
 void ZeroOutVec(vector<int> &vec);
-void GetFourPartsIncompatSplits(const set<int> &setAll, const set<int> &split1,
-                                const set<int> &split2, set<int> &part1,
-                                set<int> &part2, set<int> &part3,
-                                set<int> &part4);
+void GetFourPartsIncompatSplits(const set<int> &setAll, const set<int> &split1, const set<int> &split2, set<int> &part1, set<int> &part2, set<int> &part3, set<int> &part4);
 bool IsAllZeroVec(const vector<int> &vec);
 
 // ***************************************************************************
 // template utilties
 // ***************************************************************************
 template <class TYPE>
-void JoinSetsGen(const set<TYPE> &set1, const set<TYPE> &set2,
-                 set<TYPE> &sint) {
-  //
-  sint.clear();
-  for (typename set<TYPE>::iterator it = set1.begin(); it != set1.end(); ++it) {
+void JoinSetsGen(const set<TYPE> &set1, const set<TYPE> &set2, set<TYPE> &sint)
+{
     //
-    if (set2.find(*it) != set2.end()) {
-      //
-      sint.insert(*it);
+    sint.clear();
+    for (typename set<TYPE>::iterator it = set1.begin(); it != set1.end(); ++it)
+    {
+        //
+        if (set2.find(*it) != set2.end())
+        {
+            //
+            sint.insert(*it);
+        }
     }
-  }
 }
 
 template <class TYPE>
-void UnionSetsGen(set<TYPE> &setAdded, const set<TYPE> &setAddin) {
-  //
-  for (typename set<TYPE>::iterator it = setAddin.begin(); it != setAddin.end();
-       ++it) {
+void UnionSetsGen(set<TYPE> &setAdded, const set<TYPE> &setAddin)
+{
     //
-    setAdded.insert(*it);
-  }
+    for (typename set<TYPE>::iterator it = setAddin.begin(); it != setAddin.end(); ++it)
+    {
+        //
+        setAdded.insert(*it);
+    }
 }
 
 template <class TYPE>
-void SubtractSetsGen(set<TYPE> &setMain, const set<TYPE> &setSubtracted) {
-  //
-  for (typename set<TYPE>::iterator it = setSubtracted.begin();
-       it != setSubtracted.end(); ++it) {
+void SubtractSetsGen(set<TYPE> &setMain, const set<TYPE> &setSubtracted)
+{
     //
-    setMain.erase(*it);
-  }
+    for (typename set<TYPE>::iterator it = setSubtracted.begin(); it != setSubtracted.end(); ++it)
+    {
+        //
+        setMain.erase(*it);
+    }
 }
 
 template <class TYPE>
-bool AreItemsSimilar(const vector<TYPE> &listItems, const TYPE &tol) {
-  // are the number of items within some toleratnce from the average
-  TYPE sum = 0;
-  for (typename vector<TYPE>::const_iterator it = listItems.begin();
-       it != listItems.end(); ++it) {
-    //
-    sum += *it;
-  }
-  for (typename vector<TYPE>::const_iterator it = listItems.begin();
-       it != listItems.end(); ++it) {
-    //
-    if ((*it) - sum / listItems.size() > tol * sum / listItems.size() ||
-        sum / listItems.size() - (*it) > tol * sum / listItems.size()) {
-      return false;
+bool AreItemsSimilar(const vector<TYPE> &listItems, const TYPE &tol)
+{
+    // are the number of items within some toleratnce from the average
+    TYPE sum = 0;
+    for (typename vector<TYPE>::const_iterator it = listItems.begin(); it != listItems.end(); ++it)
+    {
+        //
+        sum += *it;
+    }
+    for (typename vector<TYPE>::const_iterator it = listItems.begin(); it != listItems.end(); ++it)
+    {
+        //
+        if ((*it) - sum / listItems.size() > tol * sum / listItems.size() || sum / listItems.size() - (*it) > tol * sum / listItems.size())
+        {
+            return false;
+        }
     }
-  }
-  return true;
+    return true;
 }
 
 //**************************************************************************************************************
@@ -345,12 +297,9 @@ typedef Enumeration Enumeration;
 const int COLOR_NUM_THRESHOLD = 40;
 const int BOX_NUM_THRESHOLD = 5;
 
-// Notice that index is zero based number
+//Notice that index is zero based number
 
-bool convert_index_to_vector(bool enum_already_set, int color_num, int box_num,
-                             int index, vector_t &result,
-                             vector_vector_t &enumeration);
-bool convert_vector_to_index(bool enum_already_set, vector_t query_vec,
-                             int &result_index, vector_vector_t &enumeration);
+bool convert_index_to_vector(bool enum_already_set, int color_num, int box_num, int index, vector_t &result, vector_vector_t &enumeration);
+bool convert_vector_to_index(bool enum_already_set, vector_t query_vec, int &result_index, vector_vector_t &enumeration);
 
-#endif // UTILS3_H
+#endif //UTILS3_H
diff --git a/trisicell/external/scistree/Utils4.cpp b/trisicell/external/scistree/Utils4.cpp
index 01c1003..428846d 100644
--- a/trisicell/external/scistree/Utils4.cpp
+++ b/trisicell/external/scistree/Utils4.cpp
@@ -76,737 +76,811 @@ void CreateTwoVecFromMap(const map<TYPE1,TYPE2> &mapIn, vector<TYPE1> &vecKey, v
 
 #endif
 
-int GetZeroOneDiff(int x, int y) {
-  if (x == y) {
-    return 0;
-  } else {
-    return 1;
-  }
+int GetZeroOneDiff(int x, int y)
+{
+    if (x == y)
+    {
+        return 0;
+    }
+    else
+    {
+        return 1;
+    }
 }
 
-void GetMatchingPosIntVec(const int val, const vector<int> &listVals,
-                          vector<int> &listPos) {
-  listPos.clear();
-  for (int i = 0; i < (int)listVals.size(); ++i) {
-    if (val == listVals[i]) {
-      listPos.push_back(i);
+void GetMatchingPosIntVec(const int val, const vector<int> &listVals, vector<int> &listPos)
+{
+    listPos.clear();
+    for (int i = 0; i < (int)listVals.size(); ++i)
+    {
+        if (val == listVals[i])
+        {
+            listPos.push_back(i);
+        }
     }
-  }
 }
 
-void FormUnitVector(int numItems, int posUnit, vector<int> &vecUnit) {
-  //
-  YW_ASSERT_INFO(posUnit < numItems, "Wrong");
-  vecUnit.clear();
-  for (int i = 0; i < numItems; ++i) {
+void FormUnitVector(int numItems, int posUnit, vector<int> &vecUnit)
+{
     //
-    vecUnit.push_back(0);
-  }
-  vecUnit[posUnit] = 1;
+    YW_ASSERT_INFO(posUnit < numItems, "Wrong");
+    vecUnit.clear();
+    for (int i = 0; i < numItems; ++i)
+    {
+        //
+        vecUnit.push_back(0);
+    }
+    vecUnit[posUnit] = 1;
 }
 
-void FormZeroVector(int numItems, vector<int> &vecZero) {
-  //
-  vecZero.clear();
-  for (int i = 0; i < numItems; ++i) {
+void FormZeroVector(int numItems, vector<int> &vecZero)
+{
     //
-    vecZero.push_back(0);
-  }
+    vecZero.clear();
+    for (int i = 0; i < numItems; ++i)
+    {
+        //
+        vecZero.push_back(0);
+    }
 }
 
-bool AreTwoSetsCompatible(const set<int> &set1, const set<int> &set2) {
-  // are two sets either disjoint or one contins another
-  set<int> sint;
-  JoinSets(set1, set2, sint);
-  if (sint.size() == 0 || sint.size() == set1.size() ||
-      sint.size() == set2.size()) {
-    return true;
-  }
-  return false;
-}
-
-bool IsSetCompatibleWithSets(const set<int> &set1,
-                             const set<set<int> > &setSets) {
-  bool res = true;
-  for (set<set<int> >::const_iterator it = setSets.begin(); it != setSets.end();
-       ++it) {
-    if (AreTwoSetsCompatible(set1, *it) == false) {
-      res = false;
-      break;
-    }
-  }
-  return res;
-}
-
-bool AreTwoSetsCompatible(const set<int> &set1, const set<int> &set2,
-                          int numTotalElem) {
-  // are two sets either disjoint or one contins another
-  set<int> sint;
-  JoinSets(set1, set2, sint);
-  if (sint.size() == 0 || sint.size() == set1.size() ||
-      sint.size() == set2.size()) {
-    return true;
-  }
-  set<int> ssTot = set1;
-  UnionSets(ssTot, set2);
-  if ((int)ssTot.size() == numTotalElem) {
-    return true;
-  }
-  return false;
-}
-
-bool IsSetCompatibleWithSets(const set<int> &set1,
-                             const set<set<int> > &setSets, int numTotalElem) {
-  bool res = true;
-  for (set<set<int> >::const_iterator it = setSets.begin(); it != setSets.end();
-       ++it) {
-    if (AreTwoSetsCompatible(set1, *it, numTotalElem) == false) {
-      res = false;
-      break;
-    }
-  }
-  return res;
-}
-
-bool IsSignificantFraction(int totNum, int numTypes, int numOneType,
-                           double minFrac) {
-  // test whether the num of one type occupies a siinficant portion of the
-  // totNum (composed of numTypes types)
-  if (minFrac >= 0.0) {
-    return numOneType >= totNum * minFrac;
-  }
-  // if not specific fraction is givn, then use the following rule based on the
-  // number of types basicallly require appearing at least two times
-  return numOneType >= 2;
-}
-
-void IncAllNumInSet(set<int> &sint) {
-  //
-  set<int> res;
-  for (set<int>::iterator it = sint.begin(); it != sint.end(); ++it) {
-    res.insert(*it + 1);
-  }
-  sint = res;
-}
-
-void DecAllNumInSet(set<int> &sint) {
-  //
-  set<int> res;
-  for (set<int>::iterator it = sint.begin(); it != sint.end(); ++it) {
-    res.insert(*it - 1);
-  }
-  sint = res;
-}
-
-void IncAllNumInSets(set<set<int> > &setInts) {
-  //
-  set<set<int> > res;
-  for (set<set<int> >::iterator it = setInts.begin(); it != setInts.end();
-       ++it) {
-    set<int> sint = *it;
-    IncAllNumInSet(sint);
-    res.insert(sint);
-  }
-  setInts = res;
-}
-
-void GetNonZeroPosofVec(const vector<int> &vec, set<int> &setpos) {
-  //
-  setpos.clear();
-  for (int i = 0; i < (int)vec.size(); ++i) {
-    if (vec[i] != 0) {
-      setpos.insert(i);
-    }
-  }
-}
-
-int GetSegIndex(int val, const vector<int> &listSegSizes) {
-  //
-  int res = -1;
-  int szSoFar = 0;
-  while (val >= szSoFar && res < (int)listSegSizes.size()) {
-    ++res;
-    szSoFar += listSegSizes[res];
-  }
-  return res;
+bool AreTwoSetsCompatible(const set<int> &set1, const set<int> &set2)
+{
+    // are two sets either disjoint or one contins another
+    set<int> sint;
+    JoinSets(set1, set2, sint);
+    if (sint.size() == 0 || sint.size() == set1.size() || sint.size() == set2.size())
+    {
+        return true;
+    }
+    return false;
 }
 
-// Prob related utilties
-double CalcPoisonProb(double rate, int numEvts) {
-  //
-  double res = exp(-1.0 * rate);
-  for (int i = 1; i <= numEvts; ++i) {
-    res *= rate / i;
-  }
-  return res;
-}
-
-void GetDiffPosOfTwoVec(const vector<int> &vec1, const vector<int> &vec2,
-                        set<int> &setpos) {
-  //
-  YW_ASSERT_INFO(vec1.size() == vec2.size(), "Size: mismatch");
-  setpos.clear();
-  for (int i = 0; i < (int)vec1.size(); ++i) {
-    if (vec1[i] != vec2[i]) {
-      setpos.insert(i);
-    }
-  }
-}
-
-void ComplementBoolVec(vector<bool> &listVals) {
-  // T->F and vice versa
-  for (int i = 0; i < (int)listVals.size(); ++i) {
-    if (listVals[i] == true) {
-      listVals[i] = false;
-    } else {
-      listVals[i] = true;
-    }
-  }
-}
-
-void GetAllGridPoints(int gridLB, int gridUB, int dimGrid,
-                      set<vector<int> > &setGridPts) {
-  //  get all grid points whose num is within the range [lb,ub]
-  YW_ASSERT_INFO(gridLB <= gridUB, "Bounds wrong");
-  YW_ASSERT_INFO(dimGrid >= 1, "Dimension must be positive");
-  // apply recurrence
-  setGridPts.clear();
-  if (dimGrid == 1) {
-    for (int v = gridLB; v <= gridUB; ++v) {
-      //
-      vector<int> vec;
-      vec.push_back(v);
-      setGridPts.insert(vec);
-    }
-  } else {
+bool IsSetCompatibleWithSets(const set<int> &set1, const set<set<int>> &setSets)
+{
+    bool res = true;
+    for (set<set<int>>::const_iterator it = setSets.begin(); it != setSets.end(); ++it)
+    {
+        if (AreTwoSetsCompatible(set1, *it) == false)
+        {
+            res = false;
+            break;
+        }
+    }
+    return res;
+}
+
+bool AreTwoSetsCompatible(const set<int> &set1, const set<int> &set2, int numTotalElem)
+{
+    // are two sets either disjoint or one contins another
+    set<int> sint;
+    JoinSets(set1, set2, sint);
+    if (sint.size() == 0 || sint.size() == set1.size() || sint.size() == set2.size())
+    {
+        return true;
+    }
+    set<int> ssTot = set1;
+    UnionSets(ssTot, set2);
+    if ((int)ssTot.size() == numTotalElem)
+    {
+        return true;
+    }
+    return false;
+}
+
+bool IsSetCompatibleWithSets(const set<int> &set1, const set<set<int>> &setSets, int numTotalElem)
+{
+    bool res = true;
+    for (set<set<int>>::const_iterator it = setSets.begin(); it != setSets.end(); ++it)
+    {
+        if (AreTwoSetsCompatible(set1, *it, numTotalElem) == false)
+        {
+            res = false;
+            break;
+        }
+    }
+    return res;
+}
+
+bool IsSignificantFraction(int totNum, int numTypes, int numOneType, double minFrac)
+{
+    // test whether the num of one type occupies a siinficant portion of the totNum (composed of numTypes types)
+    if (minFrac >= 0.0)
+    {
+        return numOneType >= totNum * minFrac;
+    }
+    // if not specific fraction is givn, then use the following rule based on the number of types
+    // basicallly require appearing at least two times
+    return numOneType >= 2;
+}
+
+void IncAllNumInSet(set<int> &sint)
+{
     //
-    set<vector<int> > setGridPtsSmall;
-    GetAllGridPoints(gridLB, gridUB, dimGrid - 1, setGridPtsSmall);
-    for (set<vector<int> >::iterator it = setGridPtsSmall.begin();
-         it != setGridPtsSmall.end(); ++it) {
-      //
-      for (int v = gridLB; v <= gridUB; ++v) {
-        //
-        vector<int> vec = *it;
-        vec.push_back(v);
-        setGridPts.insert(vec);
-      }
-    }
-  }
-}
-
-void MapIntListToAnother(const vector<int> &vec1, const vector<int> &vec2,
-                         map<int, int> &mapVec1IndexToVec2) {
-  // given two vectors, e.g. vec1 = [2,1,3] and vec2 = [3,2,1]. Create a map
-  // from vec1's index to vec2 map = [0,1], [1,2], [2,0] we assume there is no
-  // dupllicate for now
-  // cout << "MapIntListToAnother: vec1: ";
-  // DumpIntVec(vec1);
-  // cout << "vec2: ";
-  // DumpIntVec(vec2);
-  mapVec1IndexToVec2.clear();
-  YW_ASSERT_INFO(vec1.size() == vec2.size(), "size: mismatch");
-  map<int, int> mapValToIndex1, mapValToIndex2;
-  for (int i = 0; i < (int)vec1.size(); ++i) {
+    set<int> res;
+    for (set<int>::iterator it = sint.begin(); it != sint.end(); ++it)
+    {
+        res.insert(*it + 1);
+    }
+    sint = res;
+}
+
+void DecAllNumInSet(set<int> &sint)
+{
+    //
+    set<int> res;
+    for (set<int>::iterator it = sint.begin(); it != sint.end(); ++it)
+    {
+        res.insert(*it - 1);
+    }
+    sint = res;
+}
+
+void IncAllNumInSets(set<set<int>> &setInts)
+{
+    //
+    set<set<int>> res;
+    for (set<set<int>>::iterator it = setInts.begin(); it != setInts.end(); ++it)
+    {
+        set<int> sint = *it;
+        IncAllNumInSet(sint);
+        res.insert(sint);
+    }
+    setInts = res;
+}
+
+void GetNonZeroPosofVec(const vector<int> &vec, set<int> &setpos)
+{
     //
-    YW_ASSERT_INFO(mapValToIndex1.find(vec1[i]) == mapValToIndex1.end(),
-                   "Duplicate found");
-    mapValToIndex1.insert(map<int, int>::value_type(vec1[i], i));
-    // cout << "mapValToIndex1: " << vec1[i] << ", " << i << endl;
-  }
-  for (int i = 0; i < (int)vec2.size(); ++i) {
+    setpos.clear();
+    for (int i = 0; i < (int)vec.size(); ++i)
+    {
+        if (vec[i] != 0)
+        {
+            setpos.insert(i);
+        }
+    }
+}
+
+int GetSegIndex(int val, const vector<int> &listSegSizes)
+{
     //
-    YW_ASSERT_INFO(mapValToIndex2.find(vec2[i]) == mapValToIndex2.end(),
-                   "Duplicate found");
-    mapValToIndex2.insert(map<int, int>::value_type(vec2[i], i));
-    // cout << "mapValToIndex12 " << vec2[i] << ", " << i << endl;
-  }
-  for (map<int, int>::iterator it = mapValToIndex1.begin();
-       it != mapValToIndex1.end(); ++it) {
-    YW_ASSERT_INFO(mapValToIndex2.find(it->first) != mapVec1IndexToVec2.end(),
-                   "Two lists: not idential");
-    mapVec1IndexToVec2.insert(
-        map<int, int>::value_type(it->second, mapVec1IndexToVec2[it->first]));
-  }
-}
-
-void FindEvenDistriPoints(double valMin, double valMax, double valResolution,
-                          int maxNumPoints, vector<double> &listChosenVals) {
-  // pick uniformly some number (<= maxNumPoints) of points within [valMin,
-  // valMax}, with distance no more than resolution first figure out spacing
-  double valSpacing = (valMax - valMin) / maxNumPoints;
-  if (valSpacing < valResolution) {
-    valSpacing = valResolution;
-  }
-  for (int i = 0; i < (int)(valMax - valMin) / valSpacing; ++i) {
+    int res = -1;
+    int szSoFar = 0;
+    while (val >= szSoFar && res < (int)listSegSizes.size())
+    {
+        ++res;
+        szSoFar += listSegSizes[res];
+    }
+    return res;
+}
+
+// Prob related utilties
+double CalcPoisonProb(double rate, int numEvts)
+{
+    //
+    double res = exp(-1.0 * rate);
+    for (int i = 1; i <= numEvts; ++i)
+    {
+        res *= rate / i;
+    }
+    return res;
+}
+
+void GetDiffPosOfTwoVec(const vector<int> &vec1, const vector<int> &vec2, set<int> &setpos)
+{
     //
-    double val = (i + 0.5) * valSpacing;
-    listChosenVals.push_back(val);
-  }
+    YW_ASSERT_INFO(vec1.size() == vec2.size(), "Size: mismatch");
+    setpos.clear();
+    for (int i = 0; i < (int)vec1.size(); ++i)
+    {
+        if (vec1[i] != vec2[i])
+        {
+            setpos.insert(i);
+        }
+    }
+}
+
+void ComplementBoolVec(vector<bool> &listVals)
+{
+    // T->F and vice versa
+    for (int i = 0; i < (int)listVals.size(); ++i)
+    {
+        if (listVals[i] == true)
+        {
+            listVals[i] = false;
+        }
+        else
+        {
+            listVals[i] = true;
+        }
+    }
+}
+
+void GetAllGridPoints(int gridLB, int gridUB, int dimGrid, set<vector<int>> &setGridPts)
+{
+    //  get all grid points whose num is within the range [lb,ub]
+    YW_ASSERT_INFO(gridLB <= gridUB, "Bounds wrong");
+    YW_ASSERT_INFO(dimGrid >= 1, "Dimension must be positive");
+    // apply recurrence
+    setGridPts.clear();
+    if (dimGrid == 1)
+    {
+        for (int v = gridLB; v <= gridUB; ++v)
+        {
+            //
+            vector<int> vec;
+            vec.push_back(v);
+            setGridPts.insert(vec);
+        }
+    }
+    else
+    {
+        //
+        set<vector<int>> setGridPtsSmall;
+        GetAllGridPoints(gridLB, gridUB, dimGrid - 1, setGridPtsSmall);
+        for (set<vector<int>>::iterator it = setGridPtsSmall.begin(); it != setGridPtsSmall.end(); ++it)
+        {
+            //
+            for (int v = gridLB; v <= gridUB; ++v)
+            {
+                //
+                vector<int> vec = *it;
+                vec.push_back(v);
+                setGridPts.insert(vec);
+            }
+        }
+    }
+}
+
+void MapIntListToAnother(const vector<int> &vec1, const vector<int> &vec2, map<int, int> &mapVec1IndexToVec2)
+{
+    // given two vectors, e.g. vec1 = [2,1,3] and vec2 = [3,2,1]. Create a map from vec1's index to vec2
+    // map = [0,1], [1,2], [2,0]
+    // we assume there is no dupllicate for now
+    //cout << "MapIntListToAnother: vec1: ";
+    //DumpIntVec(vec1);
+    //cout << "vec2: ";
+    //DumpIntVec(vec2);
+    mapVec1IndexToVec2.clear();
+    YW_ASSERT_INFO(vec1.size() == vec2.size(), "size: mismatch");
+    map<int, int> mapValToIndex1, mapValToIndex2;
+    for (int i = 0; i < (int)vec1.size(); ++i)
+    {
+        //
+        YW_ASSERT_INFO(mapValToIndex1.find(vec1[i]) == mapValToIndex1.end(), "Duplicate found");
+        mapValToIndex1.insert(map<int, int>::value_type(vec1[i], i));
+        //cout << "mapValToIndex1: " << vec1[i] << ", " << i << endl;
+    }
+    for (int i = 0; i < (int)vec2.size(); ++i)
+    {
+        //
+        YW_ASSERT_INFO(mapValToIndex2.find(vec2[i]) == mapValToIndex2.end(), "Duplicate found");
+        mapValToIndex2.insert(map<int, int>::value_type(vec2[i], i));
+        //cout << "mapValToIndex12 " << vec2[i] << ", " << i << endl;
+    }
+    for (map<int, int>::iterator it = mapValToIndex1.begin(); it != mapValToIndex1.end(); ++it)
+    {
+        YW_ASSERT_INFO(mapValToIndex2.find(it->first) != mapVec1IndexToVec2.end(), "Two lists: not idential");
+        mapVec1IndexToVec2.insert(map<int, int>::value_type(it->second, mapVec1IndexToVec2[it->first]));
+    }
+}
+
+void FindEvenDistriPoints(double valMin, double valMax, double valResolution, int maxNumPoints, vector<double> &listChosenVals)
+{
+    // pick uniformly some number (<= maxNumPoints) of points within [valMin, valMax}, with distance no more than resolution
+    // first figure out spacing
+    double valSpacing = (valMax - valMin) / maxNumPoints;
+    if (valSpacing < valResolution)
+    {
+        valSpacing = valResolution;
+    }
+    for (int i = 0; i < (int)(valMax - valMin) / valSpacing; ++i)
+    {
+        //
+        double val = (i + 0.5) * valSpacing;
+        listChosenVals.push_back(val);
+    }
 }
 
 // bits operation
-bool IsBitSetInt(int val, int posBit) {
-  //
-  // for an index of AC, which src populaiton is a leave
-  int mask = (0x1 << posBit);
-  // assume only two populaitons for now
-  bool res = false;
-  if ((val & mask) != 0) {
-    res = true;
-  }
-  return res;
-}
-
-int ToggleBitInt(int val, int posBit) {
-  //
-  return val ^ (1 << posBit);
-}
-
-double StrToDouble(const string &s) {
-  double d;
-  stringstream ss(s); // turn the string into a stream
-  ss >> d;            // convert
-  return d;
-}
-
-double CalcProductBetween(int lb, int ub) {
-  double res = 1.0;
-  for (int i = lb; i <= ub; ++i) {
-    res *= i;
-  }
-  return res;
-}
-
-void CreateClustersFromMultisets(
-    const multiset<multiset<int> > &setMultisets,
-    map<multiset<int>, vector<multiset<int> > > &mapMultisetClusters) {
-  cout << "CreateClustersFromMultisets: DONOT WORK YET\n";
-  // give multisets S1, S2, .... Sn
-  // find the ancestral (clustering) relations among them
-  // i.e. if S1 contains S2 and S3 (as the smallest enclosing), then we have: S1
-  // (S2, S3) and so on
-  mapMultisetClusters.clear();
-
-  // this refers to the smallest container set
-  map<multiset<int>, multiset<int> > mapSmallestContainer;
-
-  //
-  // YW: TBD: issue: there may be duplicate clusters
-  // TBDDDDDDDDDDDDDDDDDDD
-
-  for (multiset<multiset<int> >::const_iterator it1 = setMultisets.begin();
-       it1 != setMultisets.end(); ++it1) {
+bool IsBitSetInt(int val, int posBit)
+{
+    //
+    // for an index of AC, which src populaiton is a leave
+    int mask = (0x1 << posBit);
+    // assume only two populaitons for now
+    bool res = false;
+    if ((val & mask) != 0)
+    {
+        res = true;
+    }
+    return res;
+}
+
+int ToggleBitInt(int val, int posBit)
+{
+    //
+    return val ^ (1 << posBit);
+}
+
+double StrToDouble(const string &s)
+{
+    double d;
+    stringstream ss(s); //turn the string into a stream
+    ss >> d;            //convert
+    return d;
+}
+
+double CalcProductBetween(int lb, int ub)
+{
+    double res = 1.0;
+    for (int i = lb; i <= ub; ++i)
+    {
+        res *= i;
+    }
+    return res;
+}
+
+void CreateClustersFromMultisets(const multiset<multiset<int>> &setMultisets, map<multiset<int>, vector<multiset<int>>> &mapMultisetClusters)
+{
+    cout << "CreateClustersFromMultisets: DONOT WORK YET\n";
+    // give multisets S1, S2, .... Sn
+    // find the ancestral (clustering) relations among them
+    // i.e. if S1 contains S2 and S3 (as the smallest enclosing), then we have: S1 (S2, S3) and so on
+    mapMultisetClusters.clear();
+
+    // this refers to the smallest container set
+    map<multiset<int>, multiset<int>> mapSmallestContainer;
+
     //
-    for (multiset<multiset<int> >::const_iterator it2 = setMultisets.begin();
-         it2 != setMultisets.end(); ++it2) {
-      //
-      if (it1 == it2) {
-        continue;
-      }
-
-      // is s1 contained by s2? and also cannot allow the two becomes the same
-      if (IsMultisetContainedIn(*it1, *it2) == true &&
-          it1->size() < it2->size()) {
+    //YW: TBD: issue: there may be duplicate clusters
+    //TBDDDDDDDDDDDDDDDDDDD
+
+    for (multiset<multiset<int>>::const_iterator it1 = setMultisets.begin(); it1 != setMultisets.end(); ++it1)
+    {
         //
-        if (mapSmallestContainer.find(*it1) == mapSmallestContainer.end()) {
-          mapSmallestContainer.insert(
-              map<multiset<int>, multiset<int> >::value_type(*it1, *it2));
-        } else if (mapSmallestContainer[*it1].size() > it2->size()) {
-          mapSmallestContainer[*it1] = *it2;
+        for (multiset<multiset<int>>::const_iterator it2 = setMultisets.begin(); it2 != setMultisets.end(); ++it2)
+        {
+            //
+            if (it1 == it2)
+            {
+                continue;
+            }
+
+            // is s1 contained by s2? and also cannot allow the two becomes the same
+            if (IsMultisetContainedIn(*it1, *it2) == true && it1->size() < it2->size())
+            {
+                //
+                if (mapSmallestContainer.find(*it1) == mapSmallestContainer.end())
+                {
+                    mapSmallestContainer.insert(map<multiset<int>, multiset<int>>::value_type(*it1, *it2));
+                }
+                else if (mapSmallestContainer[*it1].size() > it2->size())
+                {
+                    mapSmallestContainer[*it1] = *it2;
+                }
+            }
         }
-      }
     }
-  }
-  cout << "here...\n";
-  // now from the smallest container, create the clusters
-  for (map<multiset<int>, multiset<int> >::iterator it =
-           mapSmallestContainer.begin();
-       it != mapSmallestContainer.end(); ++it) {
-    if (mapMultisetClusters.find(it->second) == mapMultisetClusters.end()) {
-      vector<multiset<int> > listMSs;
-      mapMultisetClusters.insert(
-          map<multiset<int>, vector<multiset<int> > >::value_type(it->second,
-                                                                  listMSs));
+    cout << "here...\n";
+    // now from the smallest container, create the clusters
+    for (map<multiset<int>, multiset<int>>::iterator it = mapSmallestContainer.begin(); it != mapSmallestContainer.end(); ++it)
+    {
+        if (mapMultisetClusters.find(it->second) == mapMultisetClusters.end())
+        {
+            vector<multiset<int>> listMSs;
+            mapMultisetClusters.insert(map<multiset<int>, vector<multiset<int>>>::value_type(it->second, listMSs));
+        }
+        mapMultisetClusters[it->second].push_back(it->first);
     }
-    mapMultisetClusters[it->second].push_back(it->first);
-  }
 }
 
-void CountMultiset(const multiset<int> &s1, map<int, int> &msMap) {
-  for (multiset<int>::const_iterator it = s1.begin(); it != s1.end(); ++it) {
-    if (msMap.find(*it) == msMap.end()) {
-      msMap.insert(map<int, int>::value_type(*it, 0));
+void CountMultiset(const multiset<int> &s1, map<int, int> &msMap)
+{
+    for (multiset<int>::const_iterator it = s1.begin(); it != s1.end(); ++it)
+    {
+        if (msMap.find(*it) == msMap.end())
+        {
+            msMap.insert(map<int, int>::value_type(*it, 0));
+        }
+        ++msMap[*it];
     }
-    ++msMap[*it];
-  }
 }
 
-bool IsMultisetContainedIn(const multiset<int> &s1, const multiset<int> &s2) {
-  map<int, int> msMap1, msMap2;
-  CountMultiset(s1, msMap1);
-  CountMultiset(s2, msMap2);
-  for (map<int, int>::iterator it1 = msMap1.begin(); it1 != msMap1.end();
-       ++it1) {
-    if (msMap2.find(it1->first) == msMap2.end() ||
-        it1->second > msMap2[it1->first]) {
-      return false;
+bool IsMultisetContainedIn(const multiset<int> &s1, const multiset<int> &s2)
+{
+    map<int, int> msMap1, msMap2;
+    CountMultiset(s1, msMap1);
+    CountMultiset(s2, msMap2);
+    for (map<int, int>::iterator it1 = msMap1.begin(); it1 != msMap1.end(); ++it1)
+    {
+        if (msMap2.find(it1->first) == msMap2.end() || it1->second > msMap2[it1->first])
+        {
+            return false;
+        }
     }
-  }
-  return true;
+    return true;
 }
 
-void DumpIntMultiset(const multiset<int> &ms) {
-  for (multiset<int>::const_iterator it = ms.begin(); it != ms.end(); ++it) {
-    cout << *it << "  ";
-  }
-  cout << endl;
+void DumpIntMultiset(const multiset<int> &ms)
+{
+    for (multiset<int>::const_iterator it = ms.begin(); it != ms.end(); ++it)
+    {
+        cout << *it << "  ";
+    }
+    cout << endl;
 }
 
-void OutputStringsToFile(const char *filename,
-                         const vector<string> &listStrsOut) {
-  ofstream outFile(filename);
-  if (outFile.is_open() == false) {
-    cout << "Fatal error: Can not open output file: " << filename << endl;
-    exit(1);
-  }
+void OutputStringsToFile(const char *filename, const vector<string> &listStrsOut)
+{
+    ofstream outFile(filename);
+    if (outFile.is_open() == false)
+    {
+        cout << "Fatal error: Can not open output file: " << filename << endl;
+        exit(1);
+    }
 
-  for (int i = 0; i < (int)listStrsOut.size(); ++i) {
-    outFile << listStrsOut[i] << endl;
-  }
-  outFile.close();
+    for (int i = 0; i < (int)listStrsOut.size(); ++i)
+    {
+        outFile << listStrsOut[i] << endl;
+    }
+    outFile.close();
 }
 
-unsigned int ConvVecToIntGen(const vector<int> &vec, int base) {
-  // assume vec[0] is least siginicant
-  unsigned int res = 0;
+unsigned int ConvVecToIntGen(const vector<int> &vec, int base)
+{
+    // assume vec[0] is least siginicant
+    unsigned int res = 0;
 
-  for (int i = (int)vec.size() - 1; i >= 0; --i) {
-    YW_ASSERT_INFO(vec[i] >= 0 && vec[i] < base,
-                   "In ConvVecToIntGen, vector value overflow.");
-    // cout << "res = " << res << endl;
+    for (int i = (int)vec.size() - 1; i >= 0; --i)
+    {
+        YW_ASSERT_INFO(vec[i] >= 0 && vec[i] < base, "In ConvVecToIntGen, vector value overflow.");
+        //cout << "res = " << res << endl;
 
-    res += vec[i];
-    if (i > 0) {
-      res = res * base;
+        res += vec[i];
+        if (i > 0)
+        {
+            res = res * base;
+        }
     }
-  }
 
-  return res;
+    return res;
 }
 
-unsigned int ConvVecToIntGenMSB(const vector<int> &vec, int base) {
-  vector<int> vecMSB = vec;
-  // cout << "vec = ";
-  // DumpIntVec( vec );
-  ReverseIntVec(vecMSB);
-  // cout << "vec = ";
-  // DumpIntVec( vec );
-  return ConvVecToIntGen(vecMSB, base);
+unsigned int ConvVecToIntGenMSB(const vector<int> &vec, int base)
+{
+    vector<int> vecMSB = vec;
+    //cout << "vec = ";
+    //DumpIntVec( vec );
+    ReverseIntVec(vecMSB);
+    //cout << "vec = ";
+    //DumpIntVec( vec );
+    return ConvVecToIntGen(vecMSB, base);
 }
 
-int ConvVecToIntGenBounds(const vector<int> &vec, const vector<int> &bounds) {
-  // bound[i]: the largest value a digit can reach at position i
-  // assume vec[0] is least siginicant
-  unsigned int res = 0;
+int ConvVecToIntGenBounds(const vector<int> &vec, const vector<int> &bounds)
+{
+    // bound[i]: the largest value a digit can reach at position i
+    // assume vec[0] is least siginicant
+    unsigned int res = 0;
 
-  for (int i = (int)vec.size() - 1; i >= 0; --i) {
-    YW_ASSERT_INFO(vec[i] >= 0 && vec[i] <= bounds[i],
-                   "In ConvVecToIntGen, vector value overflow.");
-    // cout << "res = " << res << endl;
+    for (int i = (int)vec.size() - 1; i >= 0; --i)
+    {
+        YW_ASSERT_INFO(vec[i] >= 0 && vec[i] <= bounds[i], "In ConvVecToIntGen, vector value overflow.");
+        //cout << "res = " << res << endl;
 
-    res += vec[i];
-    if (i > 0) {
-      res = res * (bounds[i - 1] + 1);
+        res += vec[i];
+        if (i > 0)
+        {
+            res = res * (bounds[i - 1] + 1);
+        }
     }
-  }
 
-  return res;
+    return res;
 }
 
-void ConvIntToVecGen(int val, const vector<int> &bounds, vector<int> &vec) {
-  vec.clear();
+void ConvIntToVecGen(int val, const vector<int> &bounds, vector<int> &vec)
+{
+    vec.clear();
 
-  int numBits = bounds.size();
-  YW_ASSERT_INFO(numBits < 30, "Overflow000");
+    int numBits = bounds.size();
+    YW_ASSERT_INFO(numBits < 30, "Overflow000");
 
-  // we would store the least significant bit as vec[0]
-  for (int i = 0; i < numBits; ++i) {
-    int bound0 = bounds[i];
-    YW_ASSERT_INFO(bound0 >= 0, "Cannot be too small");
-    int val2 = val % (bound0 + 1);
-    vec.push_back(val2);
-    val = (val - val2) / (bound0 + 1);
-  }
+    // we would store the least significant bit as vec[0]
+    for (int i = 0; i < numBits; ++i)
+    {
+        int bound0 = bounds[i];
+        YW_ASSERT_INFO(bound0 >= 0, "Cannot be too small");
+        int val2 = val % (bound0 + 1);
+        vec.push_back(val2);
+        val = (val - val2) / (bound0 + 1);
+    }
 }
 
-int ConvRowMajorPosVecToIntGenBounds(const vector<int> &vec,
-                                     const vector<int> &bounds) {
-  // different from above: bound b means that max value is actaully b-1 (like
-  // those) bound[i]: the largest value a digit can reach at position i assume
-  // vec[0] is least siginicant
-  unsigned int res = 0;
+int ConvRowMajorPosVecToIntGenBounds(const vector<int> &vec, const vector<int> &bounds)
+{
+    // different from above: bound b means that max value is actaully b-1 (like those)
+    // bound[i]: the largest value a digit can reach at position i
+    // assume vec[0] is least siginicant
+    unsigned int res = 0;
 
-  for (int i = 0; i < (int)vec.size(); ++i) {
-    if (i > 0) {
-      res = res * (bounds[i]);
+    for (int i = 0; i < (int)vec.size(); ++i)
+    {
+        if (i > 0)
+        {
+            res = res * (bounds[i]);
+        }
+        YW_ASSERT_INFO(vec[i] >= 0 && vec[i] <= bounds[i], "In ConvVecToIntGen, vector value overflow.");
+        //cout << "res = " << res << endl;
+        res += vec[i];
     }
-    YW_ASSERT_INFO(vec[i] >= 0 && vec[i] <= bounds[i],
-                   "In ConvVecToIntGen, vector value overflow.");
-    // cout << "res = " << res << endl;
-    res += vec[i];
-  }
 
-  return res;
+    return res;
 }
 
-void ConvRowMajorIntPosToVecGen(int val, const vector<int> &bounds,
-                                vector<int> &vec) {
-  //
-  vec.clear();
+void ConvRowMajorIntPosToVecGen(int val, const vector<int> &bounds, vector<int> &vec)
+{
+    //
+    vec.clear();
 
-  int numBits = bounds.size();
-  YW_ASSERT_INFO(numBits < 30, "Overflow000");
+    int numBits = bounds.size();
+    YW_ASSERT_INFO(numBits < 30, "Overflow000");
 
-  // we would store the least significant bit as vec[0]
-  for (int i = numBits - 1; i >= 0; --i) {
-    int bound0 = bounds[i];
-    YW_ASSERT_INFO(bound0 >= 1, "Cannot be too small");
-    int val2 = val % (bound0);
-    vec.push_back(val2);
-    val = (val - val2) / (bound0);
-  }
-  ReverseIntVec(vec);
+    // we would store the least significant bit as vec[0]
+    for (int i = numBits - 1; i >= 0; --i)
+    {
+        int bound0 = bounds[i];
+        YW_ASSERT_INFO(bound0 >= 1, "Cannot be too small");
+        int val2 = val % (bound0);
+        vec.push_back(val2);
+        val = (val - val2) / (bound0);
+    }
+    ReverseIntVec(vec);
 }
 
 // utility
-class ClusterPosition {
+class ClusterPosition
+{
 public:
-  ClusterPosition() { pos = 0; }
-  ClusterPosition(const ClusterPosition &rhs) : pos(rhs.pos) {}
-  ClusterPosition(int posIn) { pos = posIn; }
-  int GetPosition() const { return pos; }
+    ClusterPosition() { pos = 0; }
+    ClusterPosition(const ClusterPosition &rhs) : pos(rhs.pos) {}
+    ClusterPosition(int posIn) { pos = posIn; }
+    int GetPosition() const { return pos; }
 
 private:
-  int pos;
+    int pos;
 };
 
-void ClusterLinearPoints(const vector<double> &listPoints,
-                         double ratioMaxInOutCmp, vector<int> &listBkpts) {
-  // assume points are sorted!!!
-  if (listPoints.size() <= 1) {
-    // nothing to cluster
-    return;
-  }
-
-  // rationInOutCmp: the max ratio btwn inside cluster and outside cluster that
-  // we will merge two groups
-  map<pair<int, int>, double> mapClusterInfo; // current max distance within
-                                              // group
-  map<int, pair<int, int> > mapPointMembership;
-
-  // init each point to self
-  for (int i = 0; i < (int)listPoints.size(); ++i) {
-    pair<int, int> pp(i, i);
-    mapClusterInfo.insert(map<pair<int, int>, double>::value_type(pp, 0.0));
-    mapPointMembership.insert(map<int, pair<int, int> >::value_type(i, pp));
-  }
-  // sort the values
-  vector<ClusterPosition> vecPosRecords;
-  for (int i = 0; i < (int)listPoints.size(); ++i) {
-    ClusterPosition cp(i);
-    vecPosRecords.push_back(cp);
-  }
-  vector<pair<double, void *> > listPointsSortedWithPos;
-  for (int i = 0; i < (int)listPoints.size() - 1; ++i) {
-    pair<double, void *> pp(listPoints[i + 1] - listPoints[i],
-                            &vecPosRecords[i]);
-    listPointsSortedWithPos.push_back(pp);
-  }
-  SortPairsByNumsDouble(listPointsSortedWithPos);
-  for (int i = 0; i < (int)listPointsSortedWithPos.size(); ++i) {
-    //
-    double diststep = listPointsSortedWithPos[i].first;
-    ClusterPosition *ptr =
-        (ClusterPosition *)(listPointsSortedWithPos[i].second);
-    int pos = ptr->GetPosition();
-    int posNext = pos + 1;
-    YW_ASSERT_INFO(mapPointMembership.find(pos) != mapPointMembership.end(),
-                   "Fail");
-    YW_ASSERT_INFO(mapPointMembership.find(posNext) != mapPointMembership.end(),
-                   "Fail");
-    pair<int, int> pp1 = mapPointMembership[pos];
-    pair<int, int> pp2 = mapPointMembership[posNext];
-    // should we merge the two; do so if the current distance
-    bool fMerge1 = true;
-    if (pp1.second > pp1.first) {
-      YW_ASSERT_INFO(mapClusterInfo.find(pp1) != mapClusterInfo.end(),
-                     "Fail to find");
-      double distCur = mapClusterInfo[pp1];
-      if (diststep <= distCur * ratioMaxInOutCmp) {
-        fMerge1 = true;
-      } else {
-        fMerge1 = false;
-      }
-    }
-    bool fMerge2 = true;
-    if (pp2.second > pp2.first) {
-      YW_ASSERT_INFO(mapClusterInfo.find(pp2) != mapClusterInfo.end(),
-                     "Fail to find");
-      double distCur = mapClusterInfo[pp2];
-      if (diststep <= distCur * ratioMaxInOutCmp) {
-        fMerge2 = true;
-      } else {
-        fMerge2 = false;
-      }
-    }
-    if (fMerge1 && fMerge2) {
-      cout << "Merging: (" << pp1.first << ", " << pp1.second << "): and ("
-           << pp2.first << "," << pp2.second << ")\n";
-      // merge
-      pair<int, int> ppnew(pp1.first, pp2.second);
-      double distMaxNew = std::max(
-          diststep, std::max(mapClusterInfo[pp1], mapClusterInfo[pp2]));
-      mapClusterInfo.insert(
-          map<pair<int, int>, double>::value_type(ppnew, distMaxNew));
-      mapClusterInfo.erase(pp1);
-      mapClusterInfo.erase(pp2);
-      for (int s = ppnew.first; s <= ppnew.second; ++s) {
-        mapPointMembership.erase(s);
-        mapPointMembership.insert(
-            map<int, pair<int, int> >::value_type(s, ppnew));
-      }
-    }
-  }
-  // now insert all segments
-  for (map<pair<int, int>, double>::iterator it = mapClusterInfo.begin();
-       it != mapClusterInfo.end(); ++it) {
-    int bkptRight = it->first.second;
-    if (bkptRight < (int)listPoints.size() - 1) {
-      listBkpts.push_back(bkptRight);
-    }
-  }
-}
-
-void FindConsecutiveIntervals(const set<int> &setItems,
-                              vector<pair<int, int> > &listIVs) {
-  listIVs.clear();
-  if (setItems.size() == 0) {
-    return;
-  }
-  int itemStart = *setItems.begin();
-  int itemPrev = itemStart;
-  set<int>::const_iterator it = setItems.begin();
-  ++it;
-  while (it != setItems.end()) {
-    if (*it != itemPrev + 1) {
-      // this is an IV
-      pair<int, int> pp(itemStart, itemPrev);
-      listIVs.push_back(pp);
-      itemStart = *it;
-    }
-
-    itemPrev = *it;
+void ClusterLinearPoints(const vector<double> &listPoints, double ratioMaxInOutCmp, vector<int> &listBkpts)
+{
+    // assume points are sorted!!!
+    if (listPoints.size() <= 1)
+    {
+        // nothing to cluster
+        return;
+    }
+
+    // rationInOutCmp: the max ratio btwn inside cluster and outside cluster that we will merge two groups
+    map<pair<int, int>, double> mapClusterInfo; // current max distance within group
+    map<int, pair<int, int>> mapPointMembership;
+
+    // init each point to self
+    for (int i = 0; i < (int)listPoints.size(); ++i)
+    {
+        pair<int, int> pp(i, i);
+        mapClusterInfo.insert(map<pair<int, int>, double>::value_type(pp, 0.0));
+        mapPointMembership.insert(map<int, pair<int, int>>::value_type(i, pp));
+    }
+    // sort the values
+    vector<ClusterPosition> vecPosRecords;
+    for (int i = 0; i < (int)listPoints.size(); ++i)
+    {
+        ClusterPosition cp(i);
+        vecPosRecords.push_back(cp);
+    }
+    vector<pair<double, void *>> listPointsSortedWithPos;
+    for (int i = 0; i < (int)listPoints.size() - 1; ++i)
+    {
+        pair<double, void *> pp(listPoints[i + 1] - listPoints[i], &vecPosRecords[i]);
+        listPointsSortedWithPos.push_back(pp);
+    }
+    SortPairsByNumsDouble(listPointsSortedWithPos);
+    for (int i = 0; i < (int)listPointsSortedWithPos.size(); ++i)
+    {
+        //
+        double diststep = listPointsSortedWithPos[i].first;
+        ClusterPosition *ptr = (ClusterPosition *)(listPointsSortedWithPos[i].second);
+        int pos = ptr->GetPosition();
+        int posNext = pos + 1;
+        YW_ASSERT_INFO(mapPointMembership.find(pos) != mapPointMembership.end(), "Fail");
+        YW_ASSERT_INFO(mapPointMembership.find(posNext) != mapPointMembership.end(), "Fail");
+        pair<int, int> pp1 = mapPointMembership[pos];
+        pair<int, int> pp2 = mapPointMembership[posNext];
+        // should we merge the two; do so if the current distance
+        bool fMerge1 = true;
+        if (pp1.second > pp1.first)
+        {
+            YW_ASSERT_INFO(mapClusterInfo.find(pp1) != mapClusterInfo.end(), "Fail to find");
+            double distCur = mapClusterInfo[pp1];
+            if (diststep <= distCur * ratioMaxInOutCmp)
+            {
+                fMerge1 = true;
+            }
+            else
+            {
+                fMerge1 = false;
+            }
+        }
+        bool fMerge2 = true;
+        if (pp2.second > pp2.first)
+        {
+            YW_ASSERT_INFO(mapClusterInfo.find(pp2) != mapClusterInfo.end(), "Fail to find");
+            double distCur = mapClusterInfo[pp2];
+            if (diststep <= distCur * ratioMaxInOutCmp)
+            {
+                fMerge2 = true;
+            }
+            else
+            {
+                fMerge2 = false;
+            }
+        }
+        if (fMerge1 && fMerge2)
+        {
+            cout << "Merging: (" << pp1.first << ", " << pp1.second << "): and (" << pp2.first << "," << pp2.second << ")\n";
+            // merge
+            pair<int, int> ppnew(pp1.first, pp2.second);
+            double distMaxNew = std::max(diststep, std::max(mapClusterInfo[pp1], mapClusterInfo[pp2]));
+            mapClusterInfo.insert(map<pair<int, int>, double>::value_type(ppnew, distMaxNew));
+            mapClusterInfo.erase(pp1);
+            mapClusterInfo.erase(pp2);
+            for (int s = ppnew.first; s <= ppnew.second; ++s)
+            {
+                mapPointMembership.erase(s);
+                mapPointMembership.insert(map<int, pair<int, int>>::value_type(s, ppnew));
+            }
+        }
+    }
+    // now insert all segments
+    for (map<pair<int, int>, double>::iterator it = mapClusterInfo.begin(); it != mapClusterInfo.end(); ++it)
+    {
+        int bkptRight = it->first.second;
+        if (bkptRight < (int)listPoints.size() - 1)
+        {
+            listBkpts.push_back(bkptRight);
+        }
+    }
+}
+
+void FindConsecutiveIntervals(const set<int> &setItems, vector<pair<int, int>> &listIVs)
+{
+    listIVs.clear();
+    if (setItems.size() == 0)
+    {
+        return;
+    }
+    int itemStart = *setItems.begin();
+    int itemPrev = itemStart;
+    set<int>::const_iterator it = setItems.begin();
     ++it;
-    if (it == setItems.end()) {
-      // ouput the prev
-      pair<int, int> pp(itemStart, itemPrev);
-      listIVs.push_back(pp);
-    }
-  }
-}
-
-void ComplementIntSet(int numTot, set<int> &setToComp) {
-  // YW: assume numbers start from 0 to numTot-1
-  set<int> ssTot;
-  PopulateSetWithInterval(ssTot, 0, numTot - 1);
-  SubtractSets(ssTot, setToComp);
-  setToComp = ssTot;
-}
-
-void GetCountsItems(int range, const set<int> &listNumbers,
-                    vector<int> &listCnts) {
-  // count occurance of numbers: listCnts[k] = # of items that is smaller or
-  // equal to k in the set
-  YW_ASSERT_INFO(range >= 0, "Must be positive");
-  listCnts.clear();
-  listCnts.resize(range + 1);
-  int cntTot = 0;
-  int posLast = -1;
-  for (set<int>::const_iterator it = listNumbers.begin();
-       it != listNumbers.end(); ++it) {
-    int val = *it;
-    YW_ASSERT_INFO(val <= range, "Wrong");
-    for (int i = posLast + 1; i < val; ++i) {
-      listCnts[i] = cntTot;
-    }
-    ++cntTot;
-    listCnts[val] = cntTot;
-    posLast = val;
-  }
-}
-
-void FindGapBlocksWithinPosVec(const vector<int> &posvec, int numItemsEnum,
-                               int numItemsGap,
-                               vector<pair<int, int> > &listSegs) {
-  // in a position vector (i.e. subset of positions 0, 1, ..., k; find gaps in
-  // between the chosen positions gaps are re-ordered to consecutive from 0, 1,
-  // ...
-  listSegs.clear();
-  vector<int> listGapLens;
-  for (int i = 0; i < (int)posvec.size(); ++i) {
+    while (it != setItems.end())
+    {
+        if (*it != itemPrev + 1)
+        {
+            // this is an IV
+            pair<int, int> pp(itemStart, itemPrev);
+            listIVs.push_back(pp);
+            itemStart = *it;
+        }
+
+        itemPrev = *it;
+        ++it;
+        if (it == setItems.end())
+        {
+            // ouput the prev
+            pair<int, int> pp(itemStart, itemPrev);
+            listIVs.push_back(pp);
+        }
+    }
+}
+
+void ComplementIntSet(int numTot, set<int> &setToComp)
+{
+    // YW: assume numbers start from 0 to numTot-1
+    set<int> ssTot;
+    PopulateSetWithInterval(ssTot, 0, numTot - 1);
+    SubtractSets(ssTot, setToComp);
+    setToComp = ssTot;
+}
+
+void GetCountsItems(int range, const set<int> &listNumbers, vector<int> &listCnts)
+{
+    // count occurance of numbers: listCnts[k] = # of items that is smaller or equal to k in the set
+    YW_ASSERT_INFO(range >= 0, "Must be positive");
+    listCnts.clear();
+    listCnts.resize(range + 1);
+    int cntTot = 0;
     int posLast = -1;
-    if (i > 0) {
-      posLast = posvec[i - 1];
-    }
-    int len = posvec[i] - posLast - 1;
-    listGapLens.push_back(len);
-  }
-  // cout << "numItemsEnum: " << numItemsEnum << ", listGapLens: ";
-  // DumpIntVec(listGapLens);
-  // last segment
-  int posFinal = numItemsEnum + numItemsGap - 1;
-  int posFirst = 0;
-  if (posvec.size() > 0) {
-    posFirst = posvec[posvec.size() - 1];
-  } else {
-    posFinal = numItemsGap;
-  }
-  int lenFinal = posFinal - posFirst;
-  // cout << "posFirst: " << posFirst << ", posFinal: " << posFinal << ",
-  // lenFinal: " << lenFinal << endl;
-  YW_ASSERT_INFO(lenFinal >= 0, "Cannot be negative");
-  listGapLens.push_back(lenFinal);
-  int posCur = 0;
-  for (int i = 0; i < (int)listGapLens.size(); ++i) {
-    pair<int, int> pp;
-    pp.first = posCur;
-    pp.second = posCur + listGapLens[i];
-
-    if (pp.first > numItemsGap) {
-      pp.first = -1;
-    }
-    if (pp.second > numItemsGap) {
-      pp.second = -1;
-    }
-
-    listSegs.push_back(pp);
-
-    // note: consecutive IV overlaps
-    posCur = pp.second;
-  }
-}
-
-void GetSetsIntParts(const set<int> &set1, const set<int> &set2,
-                     const set<int> &setAll, set<int> &set1Only,
-                     set<int> &set2Only, set<int> &set12, set<int> &setNone) {
-  //
-  set1Only = set1;
-  SubtractSets(set1Only, set2);
-  set2Only = set2;
-  SubtractSets(set2Only, set1);
-  set12 = set1;
-  UnionSets(set12, set2);
-  setNone = setAll;
-  SubtractSets(setNone, set12);
+    for (set<int>::const_iterator it = listNumbers.begin(); it != listNumbers.end(); ++it)
+    {
+        int val = *it;
+        YW_ASSERT_INFO(val <= range, "Wrong");
+        for (int i = posLast + 1; i < val; ++i)
+        {
+            listCnts[i] = cntTot;
+        }
+        ++cntTot;
+        listCnts[val] = cntTot;
+        posLast = val;
+    }
+}
+
+void FindGapBlocksWithinPosVec(const vector<int> &posvec, int numItemsEnum, int numItemsGap, vector<pair<int, int>> &listSegs)
+{
+    // in a position vector (i.e. subset of positions 0, 1, ..., k; find gaps in between the chosen positions
+    // gaps are re-ordered to consecutive from 0, 1, ...
+    listSegs.clear();
+    vector<int> listGapLens;
+    for (int i = 0; i < (int)posvec.size(); ++i)
+    {
+        int posLast = -1;
+        if (i > 0)
+        {
+            posLast = posvec[i - 1];
+        }
+        int len = posvec[i] - posLast - 1;
+        listGapLens.push_back(len);
+    }
+    //cout << "numItemsEnum: " << numItemsEnum << ", listGapLens: ";
+    //DumpIntVec(listGapLens);
+    // last segment
+    int posFinal = numItemsEnum + numItemsGap - 1;
+    int posFirst = 0;
+    if (posvec.size() > 0)
+    {
+        posFirst = posvec[posvec.size() - 1];
+    }
+    else
+    {
+        posFinal = numItemsGap;
+    }
+    int lenFinal = posFinal - posFirst;
+    //cout << "posFirst: " << posFirst << ", posFinal: " << posFinal << ", lenFinal: " << lenFinal << endl;
+    YW_ASSERT_INFO(lenFinal >= 0, "Cannot be negative");
+    listGapLens.push_back(lenFinal);
+    int posCur = 0;
+    for (int i = 0; i < (int)listGapLens.size(); ++i)
+    {
+        pair<int, int> pp;
+        pp.first = posCur;
+        pp.second = posCur + listGapLens[i];
+
+        if (pp.first > numItemsGap)
+        {
+            pp.first = -1;
+        }
+        if (pp.second > numItemsGap)
+        {
+            pp.second = -1;
+        }
+
+        listSegs.push_back(pp);
+
+        // note: consecutive IV overlaps
+        posCur = pp.second;
+    }
+}
+
+void GetSetsIntParts(const set<int> &set1, const set<int> &set2, const set<int> &setAll, set<int> &set1Only, set<int> &set2Only, set<int> &set12, set<int> &setNone)
+{
+    //
+    set1Only = set1;
+    SubtractSets(set1Only, set2);
+    set2Only = set2;
+    SubtractSets(set2Only, set1);
+    set12 = set1;
+    UnionSets(set12, set2);
+    setNone = setAll;
+    SubtractSets(setNone, set12);
 }
diff --git a/trisicell/external/scistree/Utils4.h b/trisicell/external/scistree/Utils4.h
index 3410aba..dd9276b 100644
--- a/trisicell/external/scistree/Utils4.h
+++ b/trisicell/external/scistree/Utils4.h
@@ -10,748 +10,816 @@
 #define ____Utils4__
 
 #include "Utils3.h"
-#include <algorithm>
 #include <cmath>
-#include <queue>
-#include <set>
+#include <algorithm>
 #include <sstream>
 #include <string>
+#include <queue>
+#include <set>
 
 #define YW_VERY_SMALL_FRACTION 0.000000000001
 
 // a list of templates
 template <class TYPE1, class TYPE2>
-void CreateMapFromTwoVec(const vector<TYPE1> &vecKey,
-                         const vector<TYPE2> &vecval,
-                         map<TYPE1, TYPE2> &mapCreated) {
-  //
-  YW_ASSERT_INFO(vecKey.size() == vecval.size(),
-                 "veckey has different size as vecval");
-  mapCreated.clear();
-  for (int i = 0; i < (int)vecKey.size(); ++i) {
+void CreateMapFromTwoVec(const vector<TYPE1> &vecKey, const vector<TYPE2> &vecval, map<TYPE1, TYPE2> &mapCreated)
+{
     //
-    mapCreated.insert(
-        typename map<TYPE1, TYPE2>::value_type(vecKey[i], vecval[i]));
-  }
+    YW_ASSERT_INFO(vecKey.size() == vecval.size(), "veckey has different size as vecval");
+    mapCreated.clear();
+    for (int i = 0; i < (int)vecKey.size(); ++i)
+    {
+        //
+        mapCreated.insert(typename map<TYPE1, TYPE2>::value_type(vecKey[i], vecval[i]));
+    }
 }
 
 template <class TYPE1, class TYPE2>
-void KeepCommonInMaps(map<TYPE1, TYPE2> &mapSubtracted,
-                      const map<TYPE1, TYPE2> &mapToSub) {
-  // only keep those that is also in the second map
-  map<TYPE1, TYPE2> mapNew;
-  for (typename map<TYPE1, TYPE2>::iterator it = mapSubtracted.begin();
-       it != mapSubtracted.end(); ++it) {
-    //
-    if (mapToSub.find(it->first) != mapToSub.end()) {
-      // appear in second map so keep
-      mapNew.insert(
-          typename map<TYPE1, TYPE2>::value_type(it->first, it->second));
+void KeepCommonInMaps(map<TYPE1, TYPE2> &mapSubtracted, const map<TYPE1, TYPE2> &mapToSub)
+{
+    // only keep those that is also in the second map
+    map<TYPE1, TYPE2> mapNew;
+    for (typename map<TYPE1, TYPE2>::iterator it = mapSubtracted.begin(); it != mapSubtracted.end(); ++it)
+    {
+        //
+        if (mapToSub.find(it->first) != mapToSub.end())
+        {
+            // appear in second map so keep
+            mapNew.insert(typename map<TYPE1, TYPE2>::value_type(it->first, it->second));
+        }
     }
-  }
-  mapSubtracted = mapNew;
+    mapSubtracted = mapNew;
 }
 
 template <class TYPE1, class TYPE2>
-void KeepCommonInMapsSet(map<TYPE1, TYPE2> &mapSubtracted,
-                         const set<TYPE1> &setKept) {
-  // only keep those that is also in the second map
-  map<TYPE1, TYPE2> mapNew;
-  for (typename map<TYPE1, TYPE2>::iterator it = mapSubtracted.begin();
-       it != mapSubtracted.end(); ++it) {
-    //
-    if (setKept.find(it->first) != setKept.end()) {
-      // appear in second map so keep
-      mapNew.insert(
-          typename map<TYPE1, TYPE2>::value_type(it->first, it->second));
+void KeepCommonInMapsSet(map<TYPE1, TYPE2> &mapSubtracted, const set<TYPE1> &setKept)
+{
+    // only keep those that is also in the second map
+    map<TYPE1, TYPE2> mapNew;
+    for (typename map<TYPE1, TYPE2>::iterator it = mapSubtracted.begin(); it != mapSubtracted.end(); ++it)
+    {
+        //
+        if (setKept.find(it->first) != setKept.end())
+        {
+            // appear in second map so keep
+            mapNew.insert(typename map<TYPE1, TYPE2>::value_type(it->first, it->second));
+        }
     }
-  }
-  mapSubtracted = mapNew;
+    mapSubtracted = mapNew;
 }
 
 template <class TYPE1, class TYPE2>
-void CreateTwoVecFromMap(const map<TYPE1, TYPE2> &mapIn, vector<TYPE1> &vecKey,
-                         vector<TYPE2> &vecval) {
-  vecKey.clear();
-  vecval.clear();
-  for (typename map<TYPE1, TYPE2>::const_iterator it = mapIn.begin();
-       it != mapIn.end(); ++it) {
-    vecKey.push_back(it->first);
-    vecval.push_back(it->second);
-  }
-}
-
-template <class TYPE> TYPE GetSumOfVecElements(const vector<TYPE> &listVals) {
-  TYPE sum = 0;
-  for (int i = 0; i < (int)listVals.size(); ++i) {
-    sum += listVals[i];
-  }
-  return sum;
-}
-
-template <class TYPE>
-double MyCalcStdError(const vector<TYPE> &listVals,
-                      const vector<TYPE> &listValsRef) {
-  YW_ASSERT_INFO(listVals.size() == listValsRef.size(),
-                 "CalcStdError: Size mismatch");
-  double sum = 0.0;
-  if (listValsRef.size() == 0) {
-    return 0.0;
-  }
-  for (int i = 0; i < (int)listVals.size(); ++i) {
-    double diff = listVals[i] - listValsRef[i];
-    sum += diff * diff;
-  }
-  return sqrt(sum / listValsRef.size());
-}
-
-template <class TYPE>
-void GetPositionsOverThres(const vector<TYPE> &listVals, const TYPE &val,
-                           int maxNum, set<int> &listPoses) {
-  // get positions that are either over or at, as long as the total number is
-  // not over
-  listPoses.clear();
-  for (int p = 0; p < (int)listVals.size(); ++p) {
-    //
-    if (val < listVals[p] && (int)listPoses.size() < maxNum) {
-      //
-      listPoses.insert(p);
+void CreateTwoVecFromMap(const map<TYPE1, TYPE2> &mapIn, vector<TYPE1> &vecKey, vector<TYPE2> &vecval)
+{
+    vecKey.clear();
+    vecval.clear();
+    for (typename map<TYPE1, TYPE2>::const_iterator it = mapIn.begin(); it != mapIn.end(); ++it)
+    {
+        vecKey.push_back(it->first);
+        vecval.push_back(it->second);
     }
-  }
-  // now also check for those equal
-  for (int p = 0; p < (int)listVals.size(); ++p) {
-    //
-    if (val == listVals[p] && (int)listPoses.size() < maxNum) {
-      //
-      listPoses.insert(p);
+}
+
+template <class TYPE>
+TYPE GetSumOfVecElements(const vector<TYPE> &listVals)
+{
+    TYPE sum = 0;
+    for (int i = 0; i < (int)listVals.size(); ++i)
+    {
+        sum += listVals[i];
     }
-  }
+    return sum;
 }
 
 template <class TYPE>
-void AddVecTo(vector<TYPE> &vecAdded, const vector<TYPE> &vecAdding) {
-  //
-  YW_ASSERT_INFO(vecAdded.size() == vecAdding.size(), "Size mismatch");
-  for (int i = 0; i < (int)vecAdding.size(); ++i) {
-    vecAdded[i] += vecAdding[i];
-  }
+double MyCalcStdError(const vector<TYPE> &listVals, const vector<TYPE> &listValsRef)
+{
+    YW_ASSERT_INFO(listVals.size() == listValsRef.size(), "CalcStdError: Size mismatch");
+    double sum = 0.0;
+    if (listValsRef.size() == 0)
+    {
+        return 0.0;
+    }
+    for (int i = 0; i < (int)listVals.size(); ++i)
+    {
+        double diff = listVals[i] - listValsRef[i];
+        sum += diff * diff;
+    }
+    return sqrt(sum / listValsRef.size());
 }
 
 template <class TYPE>
-void ConcatVecTo(vector<TYPE> &vecAdded, const vector<TYPE> &vecAdding) {
-  //
-  for (int i = 0; i < (int)vecAdding.size(); ++i) {
-    vecAdded.push_back(vecAdding[i]);
-  }
+void GetPositionsOverThres(const vector<TYPE> &listVals, const TYPE &val, int maxNum, set<int> &listPoses)
+{
+    // get positions that are either over or at, as long as the total number is not over
+    listPoses.clear();
+    for (int p = 0; p < (int)listVals.size(); ++p)
+    {
+        //
+        if (val < listVals[p] && (int)listPoses.size() < maxNum)
+        {
+            //
+            listPoses.insert(p);
+        }
+    }
+    // now also check for those equal
+    for (int p = 0; p < (int)listVals.size(); ++p)
+    {
+        //
+        if (val == listVals[p] && (int)listPoses.size() < maxNum)
+        {
+            //
+            listPoses.insert(p);
+        }
+    }
 }
 
 template <class TYPE>
-int FindMajorityElemVal(const vector<TYPE> &listItems, double valThres) {
-  // find out whether there is an item that is over some percentage of all the
-  // sum (say 50%)
-  // TYPE sum = GetSumOfVecElements(listItems);
-  // YW_ASSERT_INFO(sum > 0, "Can not only have zero");
-  for (int i = 0; i < (int)listItems.size(); ++i) {
-    if ((double)(listItems[i]) > valThres) {
-      return i;
+void AddVecTo(vector<TYPE> &vecAdded, const vector<TYPE> &vecAdding)
+{
+    //
+    YW_ASSERT_INFO(vecAdded.size() == vecAdding.size(), "Size mismatch");
+    for (int i = 0; i < (int)vecAdding.size(); ++i)
+    {
+        vecAdded[i] += vecAdding[i];
     }
-  }
-  // no majority item
-  return -1;
 }
 
 template <class TYPE>
-int FindMajorityElem(const vector<TYPE> &listItems, double fracMaj) {
-  TYPE sum = GetSumOfVecElements(listItems);
-  return FindMajorityElemVal(listItems, fracMaj * sum);
+void ConcatVecTo(vector<TYPE> &vecAdded, const vector<TYPE> &vecAdding)
+{
+    //
+    for (int i = 0; i < (int)vecAdding.size(); ++i)
+    {
+        vecAdded.push_back(vecAdding[i]);
+    }
 }
 
 template <class TYPE>
-void FindMajorityMultiElemVal(const vector<TYPE> &listItems, double valThres,
-                              int maxNum, set<int> &listChosenPos) {
-  // find out whether there is an item that is over some percentage of all the
-  // sum (say 50%)
-  listChosenPos.clear();
-  // TYPE sum = GetSumOfVecElements(listItems);
-  // YW_ASSERT_INFO(sum > 0, "Can not only have zero");
-  vector<TYPE> listItemsSort = listItems;
-  YWSort(listItemsSort);
-  TYPE sumSoFar = 0;
-  int numAdd = 0;
-  int indexPicked = -1;
-  for (int i = (int)listItemsSort.size() - 1; i >= 0; --i) {
-    ++numAdd;
-    if (numAdd > maxNum) {
-      break;
+int FindMajorityElemVal(const vector<TYPE> &listItems, double valThres)
+{
+    // find out whether there is an item that is over some percentage of all the sum (say 50%)
+    //TYPE sum = GetSumOfVecElements(listItems);
+    //YW_ASSERT_INFO(sum > 0, "Can not only have zero");
+    for (int i = 0; i < (int)listItems.size(); ++i)
+    {
+        if ((double)(listItems[i]) > valThres)
+        {
+            return i;
+        }
     }
-    sumSoFar += listItemsSort[i];
+    // no majority item
+    return -1;
+}
+
+template <class TYPE>
+int FindMajorityElem(const vector<TYPE> &listItems, double fracMaj)
+{
+    TYPE sum = GetSumOfVecElements(listItems);
+    return FindMajorityElemVal(listItems, fracMaj * sum);
+}
 
-    if ((double)(sumSoFar) > valThres) {
-      indexPicked = i;
-      break;
+template <class TYPE>
+void FindMajorityMultiElemVal(const vector<TYPE> &listItems, double valThres, int maxNum, set<int> &listChosenPos)
+{
+    // find out whether there is an item that is over some percentage of all the sum (say 50%)
+    listChosenPos.clear();
+    //TYPE sum = GetSumOfVecElements(listItems);
+    //YW_ASSERT_INFO(sum > 0, "Can not only have zero");
+    vector<TYPE> listItemsSort = listItems;
+    YWSort(listItemsSort);
+    TYPE sumSoFar = 0;
+    int numAdd = 0;
+    int indexPicked = -1;
+    for (int i = (int)listItemsSort.size() - 1; i >= 0; --i)
+    {
+        ++numAdd;
+        if (numAdd > maxNum)
+        {
+            break;
+        }
+        sumSoFar += listItemsSort[i];
+
+        if ((double)(sumSoFar) > valThres)
+        {
+            indexPicked = i;
+            break;
+        }
     }
-  }
-  // no majority item
-  if (indexPicked < 0) {
-    return;
-  }
-  // find the set of items that is at least that much
-  // get those over first
-  for (int i = 0; i < (int)listItems.size(); ++i) {
-    if (listItems[i] > listItemsSort[indexPicked]) {
-      listChosenPos.insert(i);
+    // no majority item
+    if (indexPicked < 0)
+    {
+        return;
     }
-  }
-  for (int i = 0; i < (int)listItems.size(); ++i) {
-    if (listItems[i] == listItemsSort[indexPicked]) {
-      if ((int)listChosenPos.size() < numAdd) {
-        listChosenPos.insert(i);
-      } else {
-        break;
-      }
+    // find the set of items that is at least that much
+    // get those over first
+    for (int i = 0; i < (int)listItems.size(); ++i)
+    {
+        if (listItems[i] > listItemsSort[indexPicked])
+        {
+            listChosenPos.insert(i);
+        }
+    }
+    for (int i = 0; i < (int)listItems.size(); ++i)
+    {
+        if (listItems[i] == listItemsSort[indexPicked])
+        {
+            if ((int)listChosenPos.size() < numAdd)
+            {
+                listChosenPos.insert(i);
+            }
+            else
+            {
+                break;
+            }
+        }
     }
-  }
 }
 
 template <class TYPE>
-void FindMajorityMultiElem(const vector<TYPE> &listItems, double fracMaj,
-                           int maxNum, set<int> &listChosenPos) {
-  // find out whether there is an item that is over some percentage of all the
-  // sum (say 50%)
-  listChosenPos.clear();
-  TYPE sum = GetSumOfVecElements(listItems);
-  YW_ASSERT_INFO(sum > 0, "Can not only have zero");
-  FindMajorityMultiElemVal(listItems, fracMaj * sum, maxNum, listChosenPos);
+void FindMajorityMultiElem(const vector<TYPE> &listItems, double fracMaj, int maxNum, set<int> &listChosenPos)
+{
+    // find out whether there is an item that is over some percentage of all the sum (say 50%)
+    listChosenPos.clear();
+    TYPE sum = GetSumOfVecElements(listItems);
+    YW_ASSERT_INFO(sum > 0, "Can not only have zero");
+    FindMajorityMultiElemVal(listItems, fracMaj * sum, maxNum, listChosenPos);
 }
 
 template <class TYPE>
-TYPE FindExtremeFreqElem(const multiset<TYPE> &setItems, bool fMin) {
-  // find out the least frequent (fMin=true) or most frequent (fMin=false) from
-  // a list of items
-  std::set<TYPE> my_set(setItems.begin(), setItems.end());
-  vector<int> listOcc;
-  vector<pair<int, TYPE> > mapOcc;
-  for (typename set<TYPE>::iterator it = my_set.begin(); it != my_set.end();
-       ++it) {
+TYPE FindExtremeFreqElem(const multiset<TYPE> &setItems, bool fMin)
+{
+    // find out the least frequent (fMin=true) or most frequent (fMin=false) from a list of items
+    std::set<TYPE> my_set(setItems.begin(), setItems.end());
+    vector<int> listOcc;
+    vector<pair<int, TYPE>> mapOcc;
+    for (typename set<TYPE>::iterator it = my_set.begin(); it != my_set.end(); ++it)
+    {
+        //
+        int count = setItems.count(*it);
+        listOcc.push_back(count);
+        pair<int, TYPE> pp(count, *it);
+        mapOcc.push_back(pp);
+    }
+    std::sort(listOcc.begin(), listOcc.end());
+    int occExt;
+    if (fMin)
+    {
+        occExt = listOcc[0];
+    }
+    else
+    {
+        occExt = listOcc[(int)listOcc.size() - 1];
+    }
+    for (int i = 0; i < (int)mapOcc.size(); ++i)
+    {
+        if (mapOcc[i].first == occExt)
+        {
+            return mapOcc[i].second;
+        }
+    }
+
+    // if failed, just return the first item
+    YW_ASSERT_INFO(false, "Fail");
+    return mapOcc[0].second;
+}
+
+template <class TYPE>
+bool YWSortCmpFunc(TYPE i, TYPE j) { return (i < j); }
+
+template <class TYPE>
+void YWSort(vector<TYPE> &vecIn)
+{
     //
-    int count = setItems.count(*it);
-    listOcc.push_back(count);
-    pair<int, TYPE> pp(count, *it);
-    mapOcc.push_back(pp);
-  }
-  std::sort(listOcc.begin(), listOcc.end());
-  int occExt;
-  if (fMin) {
-    occExt = listOcc[0];
-  } else {
-    occExt = listOcc[(int)listOcc.size() - 1];
-  }
-  for (int i = 0; i < (int)mapOcc.size(); ++i) {
-    if (mapOcc[i].first == occExt) {
-      return mapOcc[i].second;
-    }
-  }
-
-  // if failed, just return the first item
-  YW_ASSERT_INFO(false, "Fail");
-  return mapOcc[0].second;
-}
-
-template <class TYPE> bool YWSortCmpFunc(TYPE i, TYPE j) { return (i < j); }
-
-template <class TYPE> void YWSort(vector<TYPE> &vecIn) {
-  //
-  typedef bool (*comparer_t)(const TYPE, const TYPE);
-  comparer_t cmp = &YWSortCmpFunc;
-  std::sort(vecIn.begin(), vecIn.end(), cmp);
+    typedef bool (*comparer_t)(const TYPE, const TYPE);
+    comparer_t cmp = &YWSortCmpFunc;
+    std::sort(vecIn.begin(), vecIn.end(), cmp);
 }
-
-template <class TYPE> void DumpPair(const pair<TYPE, TYPE> &pp) {
-  cout << "[" << pp.first << "," << pp.second << "]";
-}
-
-template <class TYPE>
-void GetSubsetItem(const vector<TYPE> &listItems, const vector<int> &vecpos,
-                   set<TYPE> &subsetItems) {
-  //
-  subsetItems.clear();
-  for (int i = 0; i < (int)vecpos.size(); ++i) {
-    YW_ASSERT_INFO(vecpos[i] < (int)listItems.size(), "Fail");
-    subsetItems.insert(listItems[vecpos[i]]);
-  }
-}
-
-template <class TYPE>
-void GetSubsetSets(const vector<set<TYPE> > &listItems,
-                   const vector<int> &vecpos, set<TYPE> &subsetItems) {
-  //
-  subsetItems.clear();
-  for (int i = 0; i < (int)vecpos.size(); ++i) {
-    YW_ASSERT_INFO(vecpos[i] < (int)listItems.size(), "Fail");
-    UnionSetsGen(subsetItems, listItems[vecpos[i]]);
-  }
-}
-
-template <class TYPE>
-int FindMaxValPositionFromList(const vector<TYPE> &listItems) {
-  // find out whether there is an item that is over some percentage of all the
-  // sum (say 50%)
-  // TYPE sum = GetSumOfVecElements(listItems);
-  // YW_ASSERT_INFO(sum > 0, "Can not only have zero");
-  YW_ASSERT_INFO(listItems.size() > 0, "Must have at least one");
-  TYPE valMaxCur = listItems[0];
-  int posMaxCur = 0;
-  for (int i = 1; i < (int)listItems.size(); ++i) {
-    if (valMaxCur < listItems[i]) {
-      posMaxCur = i;
-      valMaxCur = listItems[i];
-    }
-  }
-  // no majority item
-  return posMaxCur;
-}
-
-template <class TYPE>
-int FindMaxValPositionFromListGap(const vector<TYPE> &listItems,
-                                  const TYPE &gapMin) {
-  // find out whether there is an item that is over some percentage of all the
-  // sum (say 50%)
-  // TYPE sum = GetSumOfVecElements(listItems);
-  // YW_ASSERT_INFO(sum > 0, "Can not only have zero");
-  YW_ASSERT_INFO(listItems.size() > 0, "Must have at least one");
-  TYPE valMaxCur = listItems[0];
-  int posMaxCur = 0;
-  for (int i = 1; i < (int)listItems.size(); ++i) {
-    if (valMaxCur + gapMin < listItems[i]) {
-      posMaxCur = i;
-      valMaxCur = listItems[i];
-    }
-  }
-  // no majority item
-  return posMaxCur;
-}
-
-template <class TYPE>
-bool IsSetContainerGen(const set<TYPE> &container, const set<TYPE> &contained) {
-  //
-  for (typename set<TYPE>::iterator it = contained.begin();
-       it != contained.end(); ++it) {
-    if (container.find(*it) == container.end()) {
-      return false;
+
+template <class TYPE>
+void DumpPair(const pair<TYPE, TYPE> &pp)
+{
+    cout << "[" << pp.first << "," << pp.second << "]";
+}
+
+template <class TYPE>
+void GetSubsetItem(const vector<TYPE> &listItems, const vector<int> &vecpos, set<TYPE> &subsetItems)
+{
+    //
+    subsetItems.clear();
+    for (int i = 0; i < (int)vecpos.size(); ++i)
+    {
+        YW_ASSERT_INFO(vecpos[i] < (int)listItems.size(), "Fail");
+        subsetItems.insert(listItems[vecpos[i]]);
     }
-  }
-  return true;
 }
 
 template <class TYPE>
-bool FindSmallestContainSetInMapGen(
-    const set<TYPE> &setTest, const map<int, set<set<TYPE> > > &mapAllSets,
-    set<TYPE> &setContainer) {
-  //
-  for (typename map<int, set<set<TYPE> > >::const_iterator it =
-           mapAllSets.begin();
-       it != mapAllSets.end(); ++it) {
-    for (typename set<set<TYPE> >::const_iterator it2 = it->second.begin();
-         it2 != it->second.end(); ++it2) {
-      if (IsSetContainerGen(*it2, setTest) == true) {
-        setContainer = *it2;
-        return true;
-      }
+void GetSubsetSets(const vector<set<TYPE>> &listItems, const vector<int> &vecpos, set<TYPE> &subsetItems)
+{
+    //
+    subsetItems.clear();
+    for (int i = 0; i < (int)vecpos.size(); ++i)
+    {
+        YW_ASSERT_INFO(vecpos[i] < (int)listItems.size(), "Fail");
+        UnionSetsGen(subsetItems, listItems[vecpos[i]]);
     }
-  }
-  return false;
 }
 
 template <class TYPE>
-bool AreSetsIntersecting(const set<TYPE> &s1In, const set<TYPE> &s2In) {
-  //
-  const set<TYPE> *ptrSet1 = &s1In;
-  const set<TYPE> *ptrSet2 = &s2In;
-  if (s1In.size() > s2In.size()) {
-    ptrSet1 = &s2In;
-    ptrSet2 = &s1In;
-  }
-  for (typename set<TYPE>::iterator it = ptrSet1->begin(); it != ptrSet1->end();
-       ++it) {
-    if (ptrSet2->find(*it) != ptrSet2->end()) {
-      return true;
-    }
-  }
-  return false;
+int FindMaxValPositionFromList(const vector<TYPE> &listItems)
+{
+    // find out whether there is an item that is over some percentage of all the sum (say 50%)
+    //TYPE sum = GetSumOfVecElements(listItems);
+    //YW_ASSERT_INFO(sum > 0, "Can not only have zero");
+    YW_ASSERT_INFO(listItems.size() > 0, "Must have at least one");
+    TYPE valMaxCur = listItems[0];
+    int posMaxCur = 0;
+    for (int i = 1; i < (int)listItems.size(); ++i)
+    {
+        if (valMaxCur < listItems[i])
+        {
+            posMaxCur = i;
+            valMaxCur = listItems[i];
+        }
+    }
+    // no majority item
+    return posMaxCur;
 }
 
-template <class TYPE> string ConvToString(const TYPE &val) {
-  ostringstream convert; // stream used for the conversion
-  convert << val;        // insert the textual representation of 'Number' in the
-                         // characters in the stream
-  return convert.str();
+template <class TYPE>
+int FindMaxValPositionFromListGap(const vector<TYPE> &listItems, const TYPE &gapMin)
+{
+    // find out whether there is an item that is over some percentage of all the sum (say 50%)
+    //TYPE sum = GetSumOfVecElements(listItems);
+    //YW_ASSERT_INFO(sum > 0, "Can not only have zero");
+    YW_ASSERT_INFO(listItems.size() > 0, "Must have at least one");
+    TYPE valMaxCur = listItems[0];
+    int posMaxCur = 0;
+    for (int i = 1; i < (int)listItems.size(); ++i)
+    {
+        if (valMaxCur + gapMin < listItems[i])
+        {
+            posMaxCur = i;
+            valMaxCur = listItems[i];
+        }
+    }
+    // no majority item
+    return posMaxCur;
 }
 
-double StrToDouble(const string &s);
+template <class TYPE>
+bool IsSetContainerGen(const set<TYPE> &container, const set<TYPE> &contained)
+{
+    //
+    for (typename set<TYPE>::iterator it = contained.begin(); it != contained.end(); ++it)
+    {
+        if (container.find(*it) == container.end())
+        {
+            return false;
+        }
+    }
+    return true;
+}
 
 template <class TYPE>
-string ConsNewickTreeFromClades(const set<set<TYPE> > &setClades) {
-  // clade: a collection of taxa (int or string); output newick format
-  // first, the set of taxa is always the outmost clade
-  set<TYPE> setTaxa;
-  map<set<TYPE>, set<TYPE> > mapCladePars;
-  for (typename set<set<TYPE> >::const_iterator it = setClades.begin();
-       it != setClades.end(); ++it) {
-    // find it out the set of taxa
-    for (typename set<TYPE>::iterator itg = it->begin(); itg != it->end();
-         ++itg) {
-      setTaxa.insert(*itg);
-    }
-  }
-  set<set<TYPE> > setCladesUsed = setClades;
-  setCladesUsed.insert(setTaxa);
-  // also ensure single taxon is in
-  for (typename set<TYPE>::iterator it = setTaxa.begin(); it != setTaxa.end();
-       ++it) {
+bool FindSmallestContainSetInMapGen(const set<TYPE> &setTest, const map<int, set<set<TYPE>>> &mapAllSets, set<TYPE> &setContainer)
+{
     //
-    set<TYPE> ss;
-    ss.insert(*it);
-    setCladesUsed.insert(ss);
-  }
-  // order the clades by size (YW: not the best implementation but hope it will
-  // work)
-  map<int, set<set<TYPE> > > mapCladesSz;
-  for (typename set<set<TYPE> >::iterator it = setCladesUsed.begin();
-       it != setCladesUsed.end(); ++it) {
-    if (mapCladesSz.find(it->size()) == mapCladesSz.end()) {
-      set<set<TYPE> > ss;
-      mapCladesSz.insert(
-          typename map<int, set<set<TYPE> > >::value_type(it->size(), ss));
-    }
-    mapCladesSz[it->size()].insert(*it);
-  }
-  // find par of each clade
-  for (typename set<set<TYPE> >::iterator it = setCladesUsed.begin();
-       it != setCladesUsed.end(); ++it) {
+    for (typename map<int, set<set<TYPE>>>::const_iterator it = mapAllSets.begin(); it != mapAllSets.end(); ++it)
+    {
+        for (typename set<set<TYPE>>::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
+        {
+            if (IsSetContainerGen(*it2, setTest) == true)
+            {
+                setContainer = *it2;
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+template <class TYPE>
+bool AreSetsIntersecting(const set<TYPE> &s1In, const set<TYPE> &s2In)
+{
     //
-    for (typename set<set<TYPE> >::iterator it2 = setCladesUsed.begin();
-         it2 != setCladesUsed.end(); ++it2) {
-      //
-      if (it2 != it && IsSetContainerGen(*it2, *it) == true) {
+    const set<TYPE> *ptrSet1 = &s1In;
+    const set<TYPE> *ptrSet2 = &s2In;
+    if (s1In.size() > s2In.size())
+    {
+        ptrSet1 = &s2In;
+        ptrSet2 = &s1In;
+    }
+    for (typename set<TYPE>::iterator it = ptrSet1->begin(); it != ptrSet1->end(); ++it)
+    {
+        if (ptrSet2->find(*it) != ptrSet2->end())
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+template <class TYPE>
+string ConvToString(const TYPE &val)
+{
+    ostringstream convert; // stream used for the conversion
+    convert << val;        // insert the textual representation of 'Number' in the characters in the stream
+    return convert.str();
+}
+
+double StrToDouble(const string &s);
+
+template <class TYPE>
+string ConsNewickTreeFromClades(const set<set<TYPE>> &setClades)
+{
+    // clade: a collection of taxa (int or string); output newick format
+    // first, the set of taxa is always the outmost clade
+    set<TYPE> setTaxa;
+    map<set<TYPE>, set<TYPE>> mapCladePars;
+    for (typename set<set<TYPE>>::const_iterator it = setClades.begin(); it != setClades.end(); ++it)
+    {
+        // find it out the set of taxa
+        for (typename set<TYPE>::iterator itg = it->begin(); itg != it->end(); ++itg)
+        {
+            setTaxa.insert(*itg);
+        }
+    }
+    set<set<TYPE>> setCladesUsed = setClades;
+    setCladesUsed.insert(setTaxa);
+    // also ensure single taxon is in
+    for (typename set<TYPE>::iterator it = setTaxa.begin(); it != setTaxa.end(); ++it)
+    {
         //
-        if (mapCladePars.find(*it) == mapCladePars.end()) {
-          mapCladePars.insert(
-              typename map<set<TYPE>, set<TYPE> >::value_type(*it, *it2));
-        } else if (mapCladePars[*it].size() > it2->size()) {
-          mapCladePars[*it] = *it2;
+        set<TYPE> ss;
+        ss.insert(*it);
+        setCladesUsed.insert(ss);
+    }
+    // order the clades by size (YW: not the best implementation but hope it will work)
+    map<int, set<set<TYPE>>> mapCladesSz;
+    for (typename set<set<TYPE>>::iterator it = setCladesUsed.begin(); it != setCladesUsed.end(); ++it)
+    {
+        if (mapCladesSz.find(it->size()) == mapCladesSz.end())
+        {
+            set<set<TYPE>> ss;
+            mapCladesSz.insert(typename map<int, set<set<TYPE>>>::value_type(it->size(), ss));
         }
-      }
-    }
-  }
-  // now assign each clade a string
-  map<set<TYPE>, string> mapCladeToStr;
-  queue<set<TYPE> > queueToProc;
-  // init leaves
-  for (typename set<TYPE>::iterator it = setTaxa.begin(); it != setTaxa.end();
-       ++it) {
-    set<TYPE> ss;
-    ss.insert(*it);
-    string strLbl = ConvToString(*it);
-    mapCladeToStr.insert(
-        typename map<set<TYPE>, string>::value_type(ss, strLbl));
-    queueToProc.push(ss);
-  }
-  // now proc from bottom up
-  for (typename map<int, set<set<TYPE> > >::iterator it = mapCladesSz.begin();
-       it != mapCladesSz.end(); ++it) {
-    for (typename set<set<TYPE> >::iterator itg = it->second.begin();
-         itg != it->second.end(); ++itg) {
-      YW_ASSERT_INFO(mapCladeToStr.find(*itg) != mapCladeToStr.end(),
-                     "Fail to find string");
-      // pass it to parent
-      if (mapCladePars.find(*itg) != mapCladePars.end()) {
-        string strBase = mapCladeToStr[*itg];
-        if (itg->size() > 1) {
-          // add parenthsis
-          strBase = "(" + mapCladeToStr[*itg] + ")";
+        mapCladesSz[it->size()].insert(*it);
+    }
+    // find par of each clade
+    for (typename set<set<TYPE>>::iterator it = setCladesUsed.begin(); it != setCladesUsed.end(); ++it)
+    {
+        //
+        for (typename set<set<TYPE>>::iterator it2 = setCladesUsed.begin(); it2 != setCladesUsed.end(); ++it2)
+        {
+            //
+            if (it2 != it && IsSetContainerGen(*it2, *it) == true)
+            {
+                //
+                if (mapCladePars.find(*it) == mapCladePars.end())
+                {
+                    mapCladePars.insert(typename map<set<TYPE>, set<TYPE>>::value_type(*it, *it2));
+                }
+                else if (mapCladePars[*it].size() > it2->size())
+                {
+                    mapCladePars[*it] = *it2;
+                }
+            }
+        }
+    }
+    // now assign each clade a string
+    map<set<TYPE>, string> mapCladeToStr;
+    queue<set<TYPE>> queueToProc;
+    // init leaves
+    for (typename set<TYPE>::iterator it = setTaxa.begin(); it != setTaxa.end(); ++it)
+    {
+        set<TYPE> ss;
+        ss.insert(*it);
+        string strLbl = ConvToString(*it);
+        mapCladeToStr.insert(typename map<set<TYPE>, string>::value_type(ss, strLbl));
+        queueToProc.push(ss);
+    }
+    // now proc from bottom up
+    for (typename map<int, set<set<TYPE>>>::iterator it = mapCladesSz.begin(); it != mapCladesSz.end(); ++it)
+    {
+        for (typename set<set<TYPE>>::iterator itg = it->second.begin(); itg != it->second.end(); ++itg)
+        {
+            YW_ASSERT_INFO(mapCladeToStr.find(*itg) != mapCladeToStr.end(), "Fail to find string");
+            // pass it to parent
+            if (mapCladePars.find(*itg) != mapCladePars.end())
+            {
+                string strBase = mapCladeToStr[*itg];
+                if (itg->size() > 1)
+                {
+                    // add parenthsis
+                    strBase = "(" + mapCladeToStr[*itg] + ")";
+                }
+
+                //
+                set<TYPE> sPar = mapCladePars[*itg];
+                if (mapCladeToStr.find(sPar) == mapCladeToStr.end())
+                {
+                    mapCladeToStr.insert(typename map<set<TYPE>, string>::value_type(sPar, strBase));
+                }
+                else
+                {
+                    mapCladeToStr[sPar] = mapCladeToStr[sPar] + "," + strBase;
+                }
+            }
         }
+    }
+    // finally
+    YW_ASSERT_INFO(mapCladeToStr.find(setTaxa) != mapCladeToStr.end(), "Wrong");
+    string res = "(" + mapCladeToStr[setTaxa] + ")";
+    return res;
+}
 
-        //
-        set<TYPE> sPar = mapCladePars[*itg];
-        if (mapCladeToStr.find(sPar) == mapCladeToStr.end()) {
-          mapCladeToStr.insert(
-              typename map<set<TYPE>, string>::value_type(sPar, strBase));
-        } else {
-          mapCladeToStr[sPar] = mapCladeToStr[sPar] + "," + strBase;
+template <class TYPE>
+void FindMaximalSets(set<set<TYPE>> &setsItems)
+{
+    // only keep those with no super set
+    set<set<TYPE>> setsItemsRes;
+    for (typename set<set<TYPE>>::iterator it = setsItems.begin(); it != setsItems.end(); ++it)
+    {
+        bool fSuperSet = false;
+        for (typename set<set<TYPE>>::iterator itg = setsItems.begin(); itg != setsItems.end(); ++itg)
+        {
+            // is itg the super set?
+            if (itg->size() > it->size() && IsSetContainerGen(*itg, *it) == true)
+            {
+                fSuperSet = true;
+                break;
+            }
+        }
+        if (fSuperSet == false)
+        {
+            setsItemsRes.insert(*it);
         }
-      }
     }
-  }
-  // finally
-  YW_ASSERT_INFO(mapCladeToStr.find(setTaxa) != mapCladeToStr.end(), "Wrong");
-  string res = "(" + mapCladeToStr[setTaxa] + ")";
-  return res;
+    setsItems = setsItemsRes;
 }
 
-template <class TYPE> void FindMaximalSets(set<set<TYPE> > &setsItems) {
-  // only keep those with no super set
-  set<set<TYPE> > setsItemsRes;
-  for (typename set<set<TYPE> >::iterator it = setsItems.begin();
-       it != setsItems.end(); ++it) {
-    bool fSuperSet = false;
-    for (typename set<set<TYPE> >::iterator itg = setsItems.begin();
-         itg != setsItems.end(); ++itg) {
-      // is itg the super set?
-      if (itg->size() > it->size() && IsSetContainerGen(*itg, *it) == true) {
-        fSuperSet = true;
-        break;
-      }
+template <class TYPE>
+void InitVecWithVal(vector<TYPE> &listVec, TYPE valInit, int numItems)
+{
+    listVec.clear();
+    for (int i = 0; i < numItems; ++i)
+    {
+        listVec.push_back(valInit);
     }
-    if (fSuperSet == false) {
-      setsItemsRes.insert(*it);
+}
+
+template <class TYPE>
+void PopulateVecBySetGen(vector<TYPE> &vec, const set<TYPE> &sset)
+{
+    //
+    vec.clear();
+    for (typename set<TYPE>::const_iterator it = sset.begin(); it != sset.end(); ++it)
+    {
+        vec.push_back(*it);
     }
-  }
-  setsItems = setsItemsRes;
 }
 
 template <class TYPE>
-void InitVecWithVal(vector<TYPE> &listVec, TYPE valInit, int numItems) {
-  listVec.clear();
-  for (int i = 0; i < numItems; ++i) {
-    listVec.push_back(valInit);
-  }
+void PopulateVecBySetPtrGen(vector<const TYPE *> &vec, const set<TYPE> &sset)
+{
+    //
+    vec.clear();
+    for (typename set<TYPE>::const_iterator it = sset.begin(); it != sset.end(); ++it)
+    {
+        vec.push_back(&(*it));
+    }
+}
+
+template <class TYPE>
+void PopulateSetPtrBySetGen(set<const TYPE *> &sptrs, const set<TYPE> &sset)
+{
+    //
+    sptrs.clear();
+    for (typename set<TYPE>::const_iterator it = sset.begin(); it != sset.end(); ++it)
+    {
+        sptrs.insert(&(*it));
+    }
+}
+
+template <class TYPE>
+void PopulateSetByVecGen(set<TYPE> &sset, const vector<TYPE> &vec)
+{
+    //
+    sset.clear();
+    for (typename vector<TYPE>::const_iterator it = vec.begin(); it != vec.end(); ++it)
+    {
+        sset.insert(*it);
+    }
+}
+
+template <class TYPE>
+void PopulateSetBySetPtrGen(set<TYPE> &sset, const set<const TYPE *> &ssetPtr)
+{
+    //
+    sset.clear();
+    for (typename set<const TYPE *>::const_iterator it = ssetPtr.begin(); it != ssetPtr.end(); ++it)
+    {
+        sset.push_back(*(*it));
+    }
+}
+
+template <class TYPE1, class TYPE2>
+void MergeMapGen(map<TYPE1, TYPE2> &mapCombined, const map<TYPE1, TYPE2> &mapToAdd)
+{
+    for (typename map<TYPE1, TYPE2>::const_iterator it = mapToAdd.begin(); it != mapToAdd.end(); ++it)
+    {
+        mapCombined.insert(typename map<TYPE1, TYPE2>::value_type(it->first, it->second));
+    }
 }
 
 template <class TYPE>
-void PopulateVecBySetGen(vector<TYPE> &vec, const set<TYPE> &sset) {
-  //
-  vec.clear();
-  for (typename set<TYPE>::const_iterator it = sset.begin(); it != sset.end();
-       ++it) {
-    vec.push_back(*it);
-  }
+void SplitItemsBySetOfPartition(const set<TYPE> &setItems, const set<set<TYPE>> &setPartitions, vector<set<TYPE>> &vecSplitParts)
+{
+    // setItems: a list of items; setpartitions: parition the space of items; vecSplitParts: split setItems into unit of those partitions
+    // approach, take join repeatitively
+    vecSplitParts.clear();
+    set<TYPE> setItemsUse = setItems;
+    while (setItemsUse.size() > 0)
+    {
+        bool fSub = false;
+        for (typename set<set<TYPE>>::iterator it = setPartitions.begin(); it != setPartitions.end(); ++it)
+        {
+            //
+            set<TYPE> setItemSub;
+            JoinSetsGen(*it, setItemsUse, setItemSub);
+            YW_ASSERT_INFO(setItemSub.size() == 0 || setItemSub.size() == it->size(), "Not a partition");
+            if (setItemSub.size() == it->size() && it->size() > 0)
+            {
+                vecSplitParts.push_back(*it);
+                SubtractSetsGen(setItemsUse, *it);
+                fSub = true;
+            }
+        }
+        YW_ASSERT_INFO(fSub == true || setItemsUse.size() == 0, "FATAL ERROR: not progress made in SplitItemsBySetOfPartition");
+    }
 }
 
 template <class TYPE>
-void PopulateVecBySetPtrGen(vector<const TYPE *> &vec, const set<TYPE> &sset) {
-  //
-  vec.clear();
-  for (typename set<TYPE>::const_iterator it = sset.begin(); it != sset.end();
-       ++it) {
-    vec.push_back(&(*it));
-  }
+bool SplitItemsBySetOfPartitionTF(const set<TYPE> &setItems, const set<set<TYPE>> &setPartitions, vector<set<TYPE>> &vecSplitParts)
+{
+    // setItems: a list of items; setpartitions: parition the space of items; vecSplitParts: split setItems into unit of those partitions
+    // approach, take join repeatitively
+    vecSplitParts.clear();
+    set<TYPE> setItemsUse = setItems;
+    while (setItemsUse.size() > 0)
+    {
+        bool fSub = false;
+        for (typename set<set<TYPE>>::iterator it = setPartitions.begin(); it != setPartitions.end(); ++it)
+        {
+            //
+            set<TYPE> setItemSub;
+            JoinSetsGen(*it, setItemsUse, setItemSub);
+            if (setItemSub.size() > 0 && setItemSub.size() < it->size())
+            {
+                return false;
+            }
+            if (setItemSub.size() == it->size() && it->size() > 0)
+            {
+                vecSplitParts.push_back(*it);
+                SubtractSetsGen(setItemsUse, *it);
+                fSub = true;
+            }
+        }
+        YW_ASSERT_INFO(fSub == true || setItemsUse.size() == 0, "FATAL ERROR: not progress made in SplitItemsBySetOfPartition");
+    }
+    return true;
 }
 
 template <class TYPE>
-void PopulateSetPtrBySetGen(set<const TYPE *> &sptrs, const set<TYPE> &sset) {
-  //
-  sptrs.clear();
-  for (typename set<TYPE>::const_iterator it = sset.begin(); it != sset.end();
-       ++it) {
-    sptrs.insert(&(*it));
-  }
+void SplitItemsofVecIntoTwoParts(const vector<TYPE> &vecItems, vector<TYPE> &vecFirstPart, vector<TYPE> &vecSecondPart, int posStartof2ndPart)
+{
+    // caution: position is 0 based
+    vecFirstPart.clear();
+    vecSecondPart.clear();
+    for (int i = 0; i < (int)vecItems.size() && i < posStartof2ndPart; ++i)
+    {
+        vecFirstPart.push_back(vecItems[i]);
+    }
+    for (int i = posStartof2ndPart; i < (int)vecItems.size(); ++i)
+    {
+        vecSecondPart.push_back(vecItems[i]);
+    }
 }
 
 template <class TYPE>
-void PopulateSetByVecGen(set<TYPE> &sset, const vector<TYPE> &vec) {
-  //
-  sset.clear();
-  for (typename vector<TYPE>::const_iterator it = vec.begin(); it != vec.end();
-       ++it) {
-    sset.insert(*it);
-  }
+void MergeTwoVectorsInto(vector<TYPE> &vecItems, const vector<TYPE> &vecFirstPart, const vector<TYPE> &vecSecondPart)
+{
+    //
+    vecItems.clear();
+    for (int i = 0; i < (int)vecFirstPart.size(); ++i)
+    {
+        vecItems.push_back(vecFirstPart[i]);
+    }
+    for (int i = 0; i < (int)vecSecondPart.size(); ++i)
+    {
+        vecItems.push_back(vecSecondPart[i]);
+    }
 }
 
 template <class TYPE>
-void PopulateSetBySetPtrGen(set<TYPE> &sset, const set<const TYPE *> &ssetPtr) {
-  //
-  sset.clear();
-  for (typename set<const TYPE *>::const_iterator it = ssetPtr.begin();
-       it != ssetPtr.end(); ++it) {
-    sset.push_back(*(*it));
-  }
+void ScaleVectorValBy(vector<TYPE> &vecItems, const TYPE &factor)
+{
+    for (int i = 0; i < (int)vecItems.size(); ++i)
+    {
+        vecItems[i] *= factor;
+    }
 }
 
-template <class TYPE1, class TYPE2>
-void MergeMapGen(map<TYPE1, TYPE2> &mapCombined,
-                 const map<TYPE1, TYPE2> &mapToAdd) {
-  for (typename map<TYPE1, TYPE2>::const_iterator it = mapToAdd.begin();
-       it != mapToAdd.end(); ++it) {
-    mapCombined.insert(
-        typename map<TYPE1, TYPE2>::value_type(it->first, it->second));
-  }
-}
-
-template <class TYPE>
-void SplitItemsBySetOfPartition(const set<TYPE> &setItems,
-                                const set<set<TYPE> > &setPartitions,
-                                vector<set<TYPE> > &vecSplitParts) {
-  // setItems: a list of items; setpartitions: parition the space of items;
-  // vecSplitParts: split setItems into unit of those partitions approach, take
-  // join repeatitively
-  vecSplitParts.clear();
-  set<TYPE> setItemsUse = setItems;
-  while (setItemsUse.size() > 0) {
-    bool fSub = false;
-    for (typename set<set<TYPE> >::iterator it = setPartitions.begin();
-         it != setPartitions.end(); ++it) {
-      //
-      set<TYPE> setItemSub;
-      JoinSetsGen(*it, setItemsUse, setItemSub);
-      YW_ASSERT_INFO(setItemSub.size() == 0 || setItemSub.size() == it->size(),
-                     "Not a partition");
-      if (setItemSub.size() == it->size() && it->size() > 0) {
-        vecSplitParts.push_back(*it);
-        SubtractSetsGen(setItemsUse, *it);
-        fSub = true;
-      }
-    }
-    YW_ASSERT_INFO(
-        fSub == true || setItemsUse.size() == 0,
-        "FATAL ERROR: not progress made in SplitItemsBySetOfPartition");
-  }
-}
-
-template <class TYPE>
-bool SplitItemsBySetOfPartitionTF(const set<TYPE> &setItems,
-                                  const set<set<TYPE> > &setPartitions,
-                                  vector<set<TYPE> > &vecSplitParts) {
-  // setItems: a list of items; setpartitions: parition the space of items;
-  // vecSplitParts: split setItems into unit of those partitions approach, take
-  // join repeatitively
-  vecSplitParts.clear();
-  set<TYPE> setItemsUse = setItems;
-  while (setItemsUse.size() > 0) {
-    bool fSub = false;
-    for (typename set<set<TYPE> >::iterator it = setPartitions.begin();
-         it != setPartitions.end(); ++it) {
-      //
-      set<TYPE> setItemSub;
-      JoinSetsGen(*it, setItemsUse, setItemSub);
-      if (setItemSub.size() > 0 && setItemSub.size() < it->size()) {
-        return false;
-      }
-      if (setItemSub.size() == it->size() && it->size() > 0) {
-        vecSplitParts.push_back(*it);
-        SubtractSetsGen(setItemsUse, *it);
-        fSub = true;
-      }
-    }
-    YW_ASSERT_INFO(
-        fSub == true || setItemsUse.size() == 0,
-        "FATAL ERROR: not progress made in SplitItemsBySetOfPartition");
-  }
-  return true;
-}
-
-template <class TYPE>
-void SplitItemsofVecIntoTwoParts(const vector<TYPE> &vecItems,
-                                 vector<TYPE> &vecFirstPart,
-                                 vector<TYPE> &vecSecondPart,
-                                 int posStartof2ndPart) {
-  // caution: position is 0 based
-  vecFirstPart.clear();
-  vecSecondPart.clear();
-  for (int i = 0; i < (int)vecItems.size() && i < posStartof2ndPart; ++i) {
-    vecFirstPart.push_back(vecItems[i]);
-  }
-  for (int i = posStartof2ndPart; i < (int)vecItems.size(); ++i) {
-    vecSecondPart.push_back(vecItems[i]);
-  }
-}
-
-template <class TYPE>
-void MergeTwoVectorsInto(vector<TYPE> &vecItems,
-                         const vector<TYPE> &vecFirstPart,
-                         const vector<TYPE> &vecSecondPart) {
-  //
-  vecItems.clear();
-  for (int i = 0; i < (int)vecFirstPart.size(); ++i) {
-    vecItems.push_back(vecFirstPart[i]);
-  }
-  for (int i = 0; i < (int)vecSecondPart.size(); ++i) {
-    vecItems.push_back(vecSecondPart[i]);
-  }
-}
-
-template <class TYPE>
-void ScaleVectorValBy(vector<TYPE> &vecItems, const TYPE &factor) {
-  for (int i = 0; i < (int)vecItems.size(); ++i) {
-    vecItems[i] *= factor;
-  }
-}
+template <class TYPE>
+void OffsetVectorValBy(vector<TYPE> &vecItems, const TYPE &factor)
+{
+    for (int i = 0; i < (int)vecItems.size(); ++i)
+    {
+        vecItems[i] += factor;
+    }
+}
 
-template <class TYPE>
-void OffsetVectorValBy(vector<TYPE> &vecItems, const TYPE &factor) {
-  for (int i = 0; i < (int)vecItems.size(); ++i) {
-    vecItems[i] += factor;
-  }
-}
-
-template <class TYPE>
-void PointwiseMultiVectorBy(vector<TYPE> &vecItems,
-                            const vector<TYPE> &vecItemsFactors) {
-  YW_ASSERT_INFO(vecItems.size() == vecItemsFactors.size(),
-                 "PointwiseMultiVectorBy: size wrong");
-  for (int i = 0; i < (int)vecItems.size(); ++i) {
-    vecItems[i] *= vecItemsFactors[i];
-  }
+template <class TYPE>
+void PointwiseMultiVectorBy(vector<TYPE> &vecItems, const vector<TYPE> &vecItemsFactors)
+{
+    YW_ASSERT_INFO(vecItems.size() == vecItemsFactors.size(), "PointwiseMultiVectorBy: size wrong");
+    for (int i = 0; i < (int)vecItems.size(); ++i)
+    {
+        vecItems[i] *= vecItemsFactors[i];
+    }
 }
 
 template <class TYPE>
-void PointwiseAddVectorBy(vector<TYPE> &vecItemsAdded,
-                          const vector<TYPE> &vecItemsAdding) {
-  YW_ASSERT_INFO(vecItemsAdded.size() == vecItemsAdding.size(),
-                 "PointwiseMultiVectorBy: size wrong");
-  for (int i = 0; i < (int)vecItemsAdded.size(); ++i) {
-    vecItemsAdded[i] += vecItemsAdding[i];
-  }
+void PointwiseAddVectorBy(vector<TYPE> &vecItemsAdded, const vector<TYPE> &vecItemsAdding)
+{
+    YW_ASSERT_INFO(vecItemsAdded.size() == vecItemsAdding.size(), "PointwiseMultiVectorBy: size wrong");
+    for (int i = 0; i < (int)vecItemsAdded.size(); ++i)
+    {
+        vecItemsAdded[i] += vecItemsAdding[i];
+    }
 }
 
 template <class TYPE>
-void CopyVecToArray(const vector<TYPE> &vecItems, TYPE *parray) {
-  // CAUTION: the array must have adequate size to avoid buffer overrun
-  for (int i = 0; i < (int)vecItems.size(); ++i) {
-    parray[i] = vecItems[i];
-  }
+void CopyVecToArray(const vector<TYPE> &vecItems, TYPE *parray)
+{
+    // CAUTION: the array must have adequate size to avoid buffer overrun
+    for (int i = 0; i < (int)vecItems.size(); ++i)
+    {
+        parray[i] = vecItems[i];
+    }
 }
 
 template <class TYPE>
-void CopyArrayToVec(TYPE *parray, int sz, vector<TYPE> &vecItems) {
-  // CAUTION: the array must have adequate size to avoid buffer overrun
-  vecItems.clear();
-  for (int i = 0; i < sz; ++i) {
-    vecItems.push_back(parray[i]);
-  }
+void CopyArrayToVec(TYPE *parray, int sz, vector<TYPE> &vecItems)
+{
+    // CAUTION: the array must have adequate size to avoid buffer overrun
+    vecItems.clear();
+    for (int i = 0; i < sz; ++i)
+    {
+        vecItems.push_back(parray[i]);
+    }
 }
 
 template <class TYPE>
-void SwapItemsInVec(vector<TYPE> &vecItems, int pos1, int pos2) {
-  YW_ASSERT_INFO(pos1 < (int)vecItems.size() && pos2 < (int)vecItems.size(),
-                 "Overflow");
-  TYPE tmp = vecItems[pos1];
-  vecItems[pos1] = vecItems[pos2];
-  vecItems[pos2] = tmp;
+void SwapItemsInVec(vector<TYPE> &vecItems, int pos1, int pos2)
+{
+    YW_ASSERT_INFO(pos1 < (int)vecItems.size() && pos2 < (int)vecItems.size(), "Overflow");
+    TYPE tmp = vecItems[pos1];
+    vecItems[pos1] = vecItems[pos2];
+    vecItems[pos2] = tmp;
 }
 
-template <class TYPE> void SwapPairGen(pair<TYPE, TYPE> &pp) {
-  TYPE t = pp.first;
-  pp.first = pp.second;
-  pp.second = t;
+template <class TYPE>
+void SwapPairGen(pair<TYPE, TYPE> &pp)
+{
+    TYPE t = pp.first;
+    pp.first = pp.second;
+    pp.second = t;
 }
 
 template <class TYPE>
-int GetClosestTo(const vector<TYPE> &listNums, TYPE &target) {
-  int pos = -1;
-  TYPE absDistMin = HAP_MAX_INT * 1.0;
-  for (int i = 0; i < (int)listNums.size(); ++i) {
-    TYPE dist1 = listNums[i] - target;
-    TYPE dist2 = target - listNums[i];
-    if (dist1 >= 0 && dist1 < absDistMin) {
-      absDistMin = dist1;
-      pos = i;
-    } else if (dist2 >= 0 && dist2 < absDistMin) {
-      absDistMin = dist2;
-      pos = i;
+int GetClosestTo(const vector<TYPE> &listNums, TYPE &target)
+{
+    int pos = -1;
+    TYPE absDistMin = HAP_MAX_INT * 1.0;
+    for (int i = 0; i < (int)listNums.size(); ++i)
+    {
+        TYPE dist1 = listNums[i] - target;
+        TYPE dist2 = target - listNums[i];
+        if (dist1 >= 0 && dist1 < absDistMin)
+        {
+            absDistMin = dist1;
+            pos = i;
+        }
+        else if (dist2 >= 0 && dist2 < absDistMin)
+        {
+            absDistMin = dist2;
+            pos = i;
+        }
     }
-  }
 
-  return pos;
+    return pos;
 }
 
 #if 0
@@ -776,586 +844,633 @@ cout << "Row " << i << " is done\n";
 
 //#if 0
 template <class TYPE>
-void ReduceContainerSetsForSetsGen(vector<set<TYPE> > &listSets) {
-  // give a list of sets, if one set A contains another set B, then remove
-  // the intersection between them from A (not B)
-  // if there is non-empty intersection but neither contains one another, DO
-  // NOTHING! note: there may be multiple ways for doing this; fornow, this
-  // procedure just finds a legal solution
-  vector<set<TYPE> > listSetsNext; // we ensure there is no container sets here
-  // process each input set, if it contains any set in the new list, reduces it
-  // and add to the ist if contained, reduce the one already in teh list (which
-  // still introduce no new container in the old list)
-  for (int i = 0; i < (int)listSets.size(); ++i) {
-    //
-    set<int> setToAdd = listSets[i];
-    // loop until no more container is found
-    bool fCont = true;
-    while (fCont == true) {
-      fCont = false;
-      for (int j = 0; j < (int)listSetsNext.size(); ++j) {
-        // test whether the new set contains any of
-        set<TYPE> setInt;
-        JoinSetsGen(setToAdd, listSetsNext[j], setInt);
-        if (setInt.size() == listSetsNext[j].size()) {
-          // reduce the one to add
-          SubtractSetsGen(setToAdd, setInt);
-          fCont = true; // since we updated the one to add (so maybe new
-                        // containment emerage), need to continue looping
-        } else {
-          if (setInt.size() == setToAdd.size()) {
-            SubtractSetsGen(listSetsNext[j], setInt);
-          }
+void ReduceContainerSetsForSetsGen(vector<set<TYPE>> &listSets)
+{
+    // give a list of sets, if one set A contains another set B, then remove
+    // the intersection between them from A (not B)
+    // if there is non-empty intersection but neither contains one another, DO NOTHING!
+    // note: there may be multiple ways for doing this; fornow, this procedure just finds a legal solution
+    vector<set<TYPE>> listSetsNext; // we ensure there is no container sets here
+    // process each input set, if it contains any set in the new list, reduces it and add to the ist
+    // if contained, reduce the one already in teh list (which still introduce no new container in the old list)
+    for (int i = 0; i < (int)listSets.size(); ++i)
+    {
+        //
+        set<int> setToAdd = listSets[i];
+        // loop until no more container is found
+        bool fCont = true;
+        while (fCont == true)
+        {
+            fCont = false;
+            for (int j = 0; j < (int)listSetsNext.size(); ++j)
+            {
+                // test whether the new set contains any of
+                set<TYPE> setInt;
+                JoinSetsGen(setToAdd, listSetsNext[j], setInt);
+                if (setInt.size() == listSetsNext[j].size())
+                {
+                    // reduce the one to add
+                    SubtractSetsGen(setToAdd, setInt);
+                    fCont = true; // since we updated the one to add (so maybe new containment emerage), need to continue looping
+                }
+                else
+                {
+                    if (setInt.size() == setToAdd.size())
+                    {
+                        SubtractSetsGen(listSetsNext[j], setInt);
+                    }
+                }
+            }
         }
-      }
-    }
-    // cout << "Adding a new set to next set:";
-    // DumpIntSet(setToAdd);
-    // add it
-    listSetsNext.push_back(setToAdd);
-  }
-  // this is the updated sets that contains no containers
-  listSets = listSetsNext;
-  // cout << "Resulting sets: ";
-  // for(int i=0; i<(int)listSets.size(); ++i)
-  //{
-  // DumpIntSet(listSets[i]);
-  //}
+        //cout << "Adding a new set to next set:";
+        //DumpIntSet(setToAdd);
+        // add it
+        listSetsNext.push_back(setToAdd);
+    }
+    // this is the updated sets that contains no containers
+    listSets = listSetsNext;
+    //cout << "Resulting sets: ";
+    //for(int i=0; i<(int)listSets.size(); ++i)
+    //{
+    //DumpIntSet(listSets[i]);
+    //}
 }
 //#endif
 
 template <class TYPE>
-void RemoveVecElementAt(vector<TYPE> &listItems, int pos) {
-  // remove the item at the pos
-  if (pos < (int)listItems.size()) {
-    listItems.erase(listItems.begin() + pos);
-  }
-}
-
-template <class TYPE>
-void AppendItemToBoundedVec(const TYPE &item, vector<TYPE> &listItem,
-                            int posvecToAdd, int maxSize) {
-  // add an item to the position in a vector
-  // if max capacity is reached, then drop the last one
-  YW_ASSERT_INFO(posvecToAdd <= (int)listItem.size(), "Position: wrong");
-  if ((int)listItem.size() == maxSize && posvecToAdd == (int)listItem.size()) {
-    // no room for it
-    return;
-  } else {
-    // create a new list
-    vector<TYPE> listItemNew;
-    int pos = 0;
-    for (; pos < posvecToAdd; ++pos) {
-      listItemNew.push_back(listItem[pos]);
-    }
-    // add this item
-    listItemNew.push_back(item);
-    // add the rest if needed
-    for (; pos < (int)listItem.size(); ++pos) {
-      if ((int)listItemNew.size() >= maxSize) {
-        // overflow, stop
-        break;
-      } else {
-        listItemNew.push_back(listItem[pos]);
-      }
-    }
-    listItem = listItemNew;
-  }
+void RemoveVecElementAt(vector<TYPE> &listItems, int pos)
+{
+    // remove the item at the pos
+    if (pos < (int)listItems.size())
+    {
+        listItems.erase(listItems.begin() + pos);
+    }
+}
+
+template <class TYPE>
+void AppendItemToBoundedVec(const TYPE &item, vector<TYPE> &listItem, int posvecToAdd, int maxSize)
+{
+    // add an item to the position in a vector
+    // if max capacity is reached, then drop the last one
+    YW_ASSERT_INFO(posvecToAdd <= (int)listItem.size(), "Position: wrong");
+    if ((int)listItem.size() == maxSize && posvecToAdd == (int)listItem.size())
+    {
+        // no room for it
+        return;
+    }
+    else
+    {
+        // create a new list
+        vector<TYPE> listItemNew;
+        int pos = 0;
+        for (; pos < posvecToAdd; ++pos)
+        {
+            listItemNew.push_back(listItem[pos]);
+        }
+        // add this item
+        listItemNew.push_back(item);
+        // add the rest if needed
+        for (; pos < (int)listItem.size(); ++pos)
+        {
+            if ((int)listItemNew.size() >= maxSize)
+            {
+                // overflow, stop
+                break;
+            }
+            else
+            {
+                listItemNew.push_back(listItem[pos]);
+            }
+        }
+        listItem = listItemNew;
+    }
 }
 
 // create a combined list by merging items (and then take average)
 template <class TYPE>
-void PutItemsInBuckets(int numBuckets, const vector<TYPE> &listItemsIn,
-                       vector<TYPE> &itemsInBuckets) {
-  // if list is empty, then dont do it
-  if (listItemsIn.size() > 0) {
-    // here buckets contains the average items in the original list
-    int stepNum = listItemsIn.size() / numBuckets;
-    if (stepNum * numBuckets < (int)listItemsIn.size()) {
-      stepNum += 1;
-    }
-    int pos = 0;
-    for (int i = 0; i < numBuckets; ++i) {
-      //
-      bool fStop = false;
-      TYPE tot = 0;
-      for (int j = 0; j < stepNum; ++j) {
-        if (pos >= (int)listItemsIn.size()) {
-          fStop = true;
-          break;
+void PutItemsInBuckets(int numBuckets, const vector<TYPE> &listItemsIn, vector<TYPE> &itemsInBuckets)
+{
+    // if list is empty, then dont do it
+    if (listItemsIn.size() > 0)
+    {
+        // here buckets contains the average items in the original list
+        int stepNum = listItemsIn.size() / numBuckets;
+        if (stepNum * numBuckets < (int)listItemsIn.size())
+        {
+            stepNum += 1;
         }
-        tot += listItemsIn[pos];
-        ++pos;
-      }
-      if (fStop == false) {
-        itemsInBuckets.push_back(tot / stepNum);
-      }
-    }
-  }
-  // fill in 0 if otherwise
-  while ((int)itemsInBuckets.size() < numBuckets) {
-    itemsInBuckets.push_back(0);
-  }
-}
-
-template <class TYPE> void ReverseVec(vector<TYPE> &vec) {
-  // cout << "Before switching: vec = ";
-  // DumpIntVec( vec );
-  // This function would reverse the integer vector, i.e. vec[0] = vec[n-1] and
-  // so on
-  for (int i = 0; i < (int)vec.size() / 2; ++i) {
-    TYPE tmp = vec[(int)vec.size() - 1 - i];
-    vec[(int)vec.size() - 1 - i] = vec[i];
-    vec[i] = tmp;
-  }
-  // cout << "After switching: vec = ";
-  // DumpIntVec( vec );
+        int pos = 0;
+        for (int i = 0; i < numBuckets; ++i)
+        {
+            //
+            bool fStop = false;
+            TYPE tot = 0;
+            for (int j = 0; j < stepNum; ++j)
+            {
+                if (pos >= (int)listItemsIn.size())
+                {
+                    fStop = true;
+                    break;
+                }
+                tot += listItemsIn[pos];
+                ++pos;
+            }
+            if (fStop == false)
+            {
+                itemsInBuckets.push_back(tot / stepNum);
+            }
+        }
+    }
+    // fill in 0 if otherwise
+    while ((int)itemsInBuckets.size() < numBuckets)
+    {
+        itemsInBuckets.push_back(0);
+    }
+}
+
+template <class TYPE>
+void ReverseVec(vector<TYPE> &vec)
+{
+    //cout << "Before switching: vec = ";
+    //DumpIntVec( vec );
+    // This function would reverse the integer vector, i.e. vec[0] = vec[n-1] and so on
+    for (int i = 0; i < (int)vec.size() / 2; ++i)
+    {
+        TYPE tmp = vec[(int)vec.size() - 1 - i];
+        vec[(int)vec.size() - 1 - i] = vec[i];
+        vec[i] = tmp;
+    }
+    //cout << "After switching: vec = ";
+    //DumpIntVec( vec );
 }
 
 // extract 1D array from 2D array
 template <class TYPE>
-void ExtractColFrom2DArray(const vector<vector<TYPE> > &array2D, int col,
-                           vector<TYPE> &vecCol) {
-  vecCol.clear();
-  YW_ASSERT_INFO(array2D.size() == 0 || col < (int)array2D[0].size(),
-                 "Overflow");
-  for (int i = 0; i < (int)array2D.size(); ++i) {
-    vecCol.push_back(array2D[i][col]);
-  }
+void ExtractColFrom2DArray(const vector<vector<TYPE>> &array2D, int col, vector<TYPE> &vecCol)
+{
+    vecCol.clear();
+    YW_ASSERT_INFO(array2D.size() == 0 || col < (int)array2D[0].size(), "Overflow");
+    for (int i = 0; i < (int)array2D.size(); ++i)
+    {
+        vecCol.push_back(array2D[i][col]);
+    }
 }
 
 // calc mean and variance
 template <class TYPE>
-void CalcMeanVarianceFor(const vector<TYPE> &listVals, double &valMean,
-                         double &valVar) {
-  YW_ASSERT_INFO(listVals.size() > 0, "Empty input");
+void CalcMeanVarianceFor(const vector<TYPE> &listVals, double &valMean, double &valVar)
+{
+    YW_ASSERT_INFO(listVals.size() > 0, "Empty input");
 
-  //
-  double valSum = 0.0;
-  for (int i = 0; i < (int)listVals.size(); ++i) {
-    valSum += (double)listVals[i];
-  }
-  valMean = valSum / listVals.size();
-  valVar = 0.0;
-  for (int i = 0; i < (int)listVals.size(); ++i) {
-    double vdiff = listVals[i] - valMean;
-    valVar += vdiff * vdiff;
-  }
+    //
+    double valSum = 0.0;
+    for (int i = 0; i < (int)listVals.size(); ++i)
+    {
+        valSum += (double)listVals[i];
+    }
+    valMean = valSum / listVals.size();
+    valVar = 0.0;
+    for (int i = 0; i < (int)listVals.size(); ++i)
+    {
+        double vdiff = listVals[i] - valMean;
+        valVar += vdiff * vdiff;
+    }
 }
 
 template <class TYPE1, class TYPE2>
-void FindMinFromPairedListGen(const vector<pair<TYPE1, TYPE2> > &vecListInput,
-                              vector<pair<TYPE1, TYPE2> > &listMinItems) {
-  // TYPE1: value (key), TYPE2: can be anything (maybe a pointer for example)
-  // there may be multiple items with value (type1) are minimum; listMinItems:
-  // contain all such items
-  listMinItems.clear();
-  if (vecListInput.size() == 0) {
-    return;
-  }
-  TYPE1 valMin = vecListInput[0].first;
-  listMinItems.push_back(vecListInput[0]);
-  for (int i = 1; i < (int)vecListInput.size(); ++i) {
-    //
-    if (vecListInput[i].first < valMin) {
-      valMin = vecListInput[i].first;
-      listMinItems.clear();
-      listMinItems.push_back(vecListInput[i]);
-    } else if (vecListInput[i].first == valMin) {
-      listMinItems.push_back(vecListInput[i]);
-    }
-  }
-}
-
-template <class TYPE>
-void FindRangeInSortedVector(const vector<TYPE> &listSortVals,
-                             const TYPE &valLB, const TYPE &valUB, int &posLB,
-                             int &posUB) {
-  // given a sorted list, and a range [lb,ub]; want to find the range in the
-  // list that contain the list if there is no such range, set as -1
-  posLB = 0;
-  posUB = (int)listSortVals.size() - 1;
-  while (listSortVals[posLB] < valLB) {
-    ++posLB;
-  }
-  while (listSortVals[posUB] > valUB) {
-    --posUB;
-  }
-  if (posLB > posUB) {
-    posLB = -1;
-    posUB = -1;
-  }
-}
-
-template <class TYPE> void DumpVecWithSpace(const vector<TYPE> &listItems) {
-  // remove the item at the pos
-  for (int i = 0; i < (int)listItems.size(); ++i) {
-    cout << listItems[i];
-    if (i < (int)listItems.size() - 1) {
-      cout << "  ";
-    }
-  }
+void FindMinFromPairedListGen(const vector<pair<TYPE1, TYPE2>> &vecListInput, vector<pair<TYPE1, TYPE2>> &listMinItems)
+{
+    // TYPE1: value (key), TYPE2: can be anything (maybe a pointer for example)
+    // there may be multiple items with value (type1) are minimum; listMinItems: contain all such items
+    listMinItems.clear();
+    if (vecListInput.size() == 0)
+    {
+        return;
+    }
+    TYPE1 valMin = vecListInput[0].first;
+    listMinItems.push_back(vecListInput[0]);
+    for (int i = 1; i < (int)vecListInput.size(); ++i)
+    {
+        //
+        if (vecListInput[i].first < valMin)
+        {
+            valMin = vecListInput[i].first;
+            listMinItems.clear();
+            listMinItems.push_back(vecListInput[i]);
+        }
+        else if (vecListInput[i].first == valMin)
+        {
+            listMinItems.push_back(vecListInput[i]);
+        }
+    }
+}
+
+template <class TYPE>
+void FindRangeInSortedVector(const vector<TYPE> &listSortVals, const TYPE &valLB, const TYPE &valUB, int &posLB, int &posUB)
+{
+    // given a sorted list, and a range [lb,ub]; want to find the range in the list that contain the list
+    // if there is no such range, set as -1
+    posLB = 0;
+    posUB = (int)listSortVals.size() - 1;
+    while (listSortVals[posLB] < valLB)
+    {
+        ++posLB;
+    }
+    while (listSortVals[posUB] > valUB)
+    {
+        --posUB;
+    }
+    if (posLB > posUB)
+    {
+        posLB = -1;
+        posUB = -1;
+    }
+}
+
+template <class TYPE>
+void DumpVecWithSpace(const vector<TYPE> &listItems)
+{
+    // remove the item at the pos
+    for (int i = 0; i < (int)listItems.size(); ++i)
+    {
+        cout << listItems[i];
+        if (i < (int)listItems.size() - 1)
+        {
+            cout << "  ";
+        }
+    }
 }
 
 template <class TYPE1, class TYPE2>
-void AddingMaps(map<TYPE1, TYPE2> &mapUnion,
-                const map<TYPE1, TYPE2> &mapToUnion) {
-  // append two maps; for duplicates (i.e. in both maps), perform a adding
-  for (typename map<TYPE1, TYPE2>::const_iterator it = mapToUnion.begin();
-       it != mapToUnion.end(); ++it) {
-    //
-    if (mapUnion.find(it->first) != mapUnion.end()) {
-      // add it in
-      mapUnion[it->first] += it->second;
-    } else {
-      //
-      mapUnion.insert(
-          typename map<TYPE1, TYPE2>::value_type(it->first, it->second));
+void AddingMaps(map<TYPE1, TYPE2> &mapUnion, const map<TYPE1, TYPE2> &mapToUnion)
+{
+    // append two maps; for duplicates (i.e. in both maps), perform a adding
+    for (typename map<TYPE1, TYPE2>::const_iterator it = mapToUnion.begin(); it != mapToUnion.end(); ++it)
+    {
+        //
+        if (mapUnion.find(it->first) != mapUnion.end())
+        {
+            // add it in
+            mapUnion[it->first] += it->second;
+        }
+        else
+        {
+            //
+            mapUnion.insert(typename map<TYPE1, TYPE2>::value_type(it->first, it->second));
+        }
     }
-  }
 }
 
 template <class TYPE1, class TYPE2>
-void MaxMaps(map<TYPE1, TYPE2> &mapMax, const map<TYPE1, TYPE2> &mapCmp,
-             bool fMax) {
-  // taking the maximum value of the two maps; if fMax = false, taking the min
-  for (typename map<TYPE1, TYPE2>::const_iterator it = mapCmp.begin();
-       it != mapCmp.end(); ++it) {
-    //
-    if (mapMax.find(it->first) != mapMax.end()) {
-      //
-      if ((mapMax[it->first] < it->second && fMax) ||
-          (mapMax[it->first] > it->second && fMax == false)) {
-        mapMax[it->first] = it->second;
-      }
-    } else {
-      //
-      mapMax.insert(
-          typename map<TYPE1, TYPE2>::value_type(it->first, it->second));
+void MaxMaps(map<TYPE1, TYPE2> &mapMax, const map<TYPE1, TYPE2> &mapCmp, bool fMax)
+{
+    // taking the maximum value of the two maps; if fMax = false, taking the min
+    for (typename map<TYPE1, TYPE2>::const_iterator it = mapCmp.begin(); it != mapCmp.end(); ++it)
+    {
+        //
+        if (mapMax.find(it->first) != mapMax.end())
+        {
+            //
+            if ((mapMax[it->first] < it->second && fMax) || (mapMax[it->first] > it->second && fMax == false))
+            {
+                mapMax[it->first] = it->second;
+            }
+        }
+        else
+        {
+            //
+            mapMax.insert(typename map<TYPE1, TYPE2>::value_type(it->first, it->second));
+        }
     }
-  }
 }
 
 template <class TYPE1, class TYPE2>
-void MapIntSetTo(const set<TYPE1> &sint1,
-                 const map<TYPE1, TYPE2> &mapOneToOther, set<TYPE2> &sres) {
-  // map items in sint1 to sres; CAUTION: duplicates may be lost
-  sres.clear();
-  for (typename set<TYPE1>::iterator it = sint1.begin(); it != sint1.end();
-       ++it) {
-    typename map<TYPE1, TYPE2>::const_iterator it2 = mapOneToOther.find(*it);
-    if (it2 != mapOneToOther.end()) {
-      sres.insert(it2->second);
-    } else {
-      // something very wrong
-      YW_ASSERT_INFO(false, "Mapping failed");
-    }
-  }
+void MapIntSetTo(const set<TYPE1> &sint1, const map<TYPE1, TYPE2> &mapOneToOther, set<TYPE2> &sres)
+{
+    // map items in sint1 to sres; CAUTION: duplicates may be lost
+    sres.clear();
+    for (typename set<TYPE1>::iterator it = sint1.begin(); it != sint1.end(); ++it)
+    {
+        typename map<TYPE1, TYPE2>::const_iterator it2 = mapOneToOther.find(*it);
+        if (it2 != mapOneToOther.end())
+        {
+            sres.insert(it2->second);
+        }
+        else
+        {
+            // something very wrong
+            YW_ASSERT_INFO(false, "Mapping failed");
+        }
+    }
 }
 
 template <class TYPE1, class TYPE2>
-void MapVecToGen(const vector<TYPE1> &sint1,
-                 const map<TYPE1, TYPE2> &mapOneToOther, vector<TYPE2> &sres) {
-  // map items in sint1 to sres;
-  // YW: if some items cannot find a record in map, store the original item
-  sres.clear();
-  for (typename vector<TYPE1>::iterator it = sint1.begin(); it != sint1.end();
-       ++it) {
-    typename map<TYPE1, TYPE2>::const_iterator it2 = mapOneToOther.find(*it);
-    if (it2 != mapOneToOther.end()) {
-      sres.push_back(it2->second);
-    } else {
-      // something very wrong
-      sres.push_back(*it);
-    }
-  }
+void MapVecToGen(const vector<TYPE1> &sint1, const map<TYPE1, TYPE2> &mapOneToOther, vector<TYPE2> &sres)
+{
+    // map items in sint1 to sres;
+    // YW: if some items cannot find a record in map, store the original item
+    sres.clear();
+    for (typename vector<TYPE1>::iterator it = sint1.begin(); it != sint1.end(); ++it)
+    {
+        typename map<TYPE1, TYPE2>::const_iterator it2 = mapOneToOther.find(*it);
+        if (it2 != mapOneToOther.end())
+        {
+            sres.push_back(it2->second);
+        }
+        else
+        {
+            // something very wrong
+            sres.push_back(*it);
+        }
+    }
 }
 
 template <class TYPE1, class TYPE2>
-void InverseMap(const map<TYPE1, TYPE2> &map1, map<TYPE2, TYPE1> &mapInv) {
-  // append two maps; for duplicates (i.e. in both maps), perform a adding
-  for (typename map<TYPE1, TYPE2>::const_iterator it = map1.begin();
-       it != map1.end(); ++it) {
-    mapInv.insert(
-        typename map<TYPE2, TYPE1>::value_type(it->second, it->first));
-  }
+void InverseMap(const map<TYPE1, TYPE2> &map1, map<TYPE2, TYPE1> &mapInv)
+{
+    // append two maps; for duplicates (i.e. in both maps), perform a adding
+    for (typename map<TYPE1, TYPE2>::const_iterator it = map1.begin(); it != map1.end(); ++it)
+    {
+        mapInv.insert(typename map<TYPE2, TYPE1>::value_type(it->second, it->first));
+    }
 }
 
 template <class TYPE>
-int GetItemIndexInVecGen(const vector<TYPE> &vec, TYPE &item) {
-  //
-  for (unsigned int i = 0; i < vec.size(); ++i) {
-    if (vec[i] == item) {
-      return (int)i;
+int GetItemIndexInVecGen(const vector<TYPE> &vec, TYPE &item)
+{
+    //
+    for (unsigned int i = 0; i < vec.size(); ++i)
+    {
+        if (vec[i] == item)
+        {
+            return (int)i;
+        }
     }
-  }
-  return -1;
+    return -1;
 }
 
 // if vec1 smaller than vec2 pointwise
 template <class TYPE>
-bool IsVecSmallerThan(const vector<TYPE> &vec1, const vector<TYPE> &vec2) {
-  //
-  YW_ASSERT_INFO(vec1.size() == vec2.size(), "Size: mismatch");
-  for (unsigned int i = 0; i < vec1.size(); ++i) {
-    if (vec1[i] >= vec2[i]) {
-      return false;
+bool IsVecSmallerThan(const vector<TYPE> &vec1, const vector<TYPE> &vec2)
+{
+    //
+    YW_ASSERT_INFO(vec1.size() == vec2.size(), "Size: mismatch");
+    for (unsigned int i = 0; i < vec1.size(); ++i)
+    {
+        if (vec1[i] >= vec2[i])
+        {
+            return false;
+        }
     }
-  }
-  return true;
+    return true;
 }
 
 // calc Jaccard index for two sets
 template <class TYPE>
-double CalcJaccrdIndexForTwoSets(const set<TYPE> &s1, const set<TYPE> &s2) {
-  //
-  set<TYPE> sunion = s1;
-  UnionSetsGen(sunion, s2);
-  set<TYPE> sjoin;
-  JoinSetsGen(s1, s2, sjoin);
-  return ((double)sjoin.size()) / sunion.size();
+double CalcJaccrdIndexForTwoSets(const set<TYPE> &s1, const set<TYPE> &s2)
+{
+    //
+    set<TYPE> sunion = s1;
+    UnionSetsGen(sunion, s2);
+    set<TYPE> sjoin;
+    JoinSetsGen(s1, s2, sjoin);
+    return ((double)sjoin.size()) / sunion.size();
 }
 
 // find the best matched set
 template <class TYPE>
-double GetBestJaccrdMatchedSetIn(const set<TYPE> &s1,
-                                 const set<set<TYPE> > &listSet2,
-                                 set<TYPE> &bestMatch) {
-  // return negative if no match found
-  double score = -1.0;
-  for (typename set<set<TYPE> >::const_iterator it = listSet2.begin();
-       it != listSet2.end(); ++it) {
-    double scoreStep = CalcJaccrdIndexForTwoSets(s1, *it);
-    if (scoreStep > score) {
-      score = scoreStep;
-      bestMatch = *it;
+double GetBestJaccrdMatchedSetIn(const set<TYPE> &s1, const set<set<TYPE>> &listSet2, set<TYPE> &bestMatch)
+{
+    // return negative if no match found
+    double score = -1.0;
+    for (typename set<set<TYPE>>::const_iterator it = listSet2.begin(); it != listSet2.end(); ++it)
+    {
+        double scoreStep = CalcJaccrdIndexForTwoSets(s1, *it);
+        if (scoreStep > score)
+        {
+            score = scoreStep;
+            bestMatch = *it;
+        }
     }
-  }
-  return score;
+    return score;
 }
 
 // find the leftmost common item of the two lists
 template <class TYPE>
-bool FindLeftmostCommonItem(const vector<TYPE> &vec1, const vector<TYPE> &vec2,
-                            TYPE &res) {
-  // for now do a simple test;
-  for (int i = 0; i < (int)vec1.size(); ++i) {
-    for (int j = 0; j < (int)vec2.size(); ++j) {
-      if (vec1[i] == vec2[j]) {
-        res = vec1[i];
-        return true;
-      }
+bool FindLeftmostCommonItem(const vector<TYPE> &vec1, const vector<TYPE> &vec2, TYPE &res)
+{
+    // for now do a simple test;
+    for (int i = 0; i < (int)vec1.size(); ++i)
+    {
+        for (int j = 0; j < (int)vec2.size(); ++j)
+        {
+            if (vec1[i] == vec2[j])
+            {
+                res = vec1[i];
+                return true;
+            }
+        }
     }
-  }
-  return false;
+    return false;
 }
 
 // find different items in two sets
 template <class TYPE>
-void FindDiffOfTwoSets(const set<TYPE> &setItems1, const set<TYPE> &setItems2,
-                       set<TYPE> &set1Only, set<TYPE> &set2Only) {
-  // find items that are in set 1 and 2 only
-  set1Only.clear();
-  set2Only.clear();
-  for (typename set<TYPE>::const_iterator it = setItems1.begin();
-       it != setItems1.end(); ++it) {
-    if (setItems2.find(*it) == setItems2.end()) {
-      set1Only.insert(*it);
+void FindDiffOfTwoSets(const set<TYPE> &setItems1, const set<TYPE> &setItems2, set<TYPE> &set1Only, set<TYPE> &set2Only)
+{
+    // find items that are in set 1 and 2 only
+    set1Only.clear();
+    set2Only.clear();
+    for (typename set<TYPE>::const_iterator it = setItems1.begin(); it != setItems1.end(); ++it)
+    {
+        if (setItems2.find(*it) == setItems2.end())
+        {
+            set1Only.insert(*it);
+        }
     }
-  }
-  for (typename set<TYPE>::const_iterator it = setItems2.begin();
-       it != setItems2.end(); ++it) {
-    if (setItems1.find(*it) == setItems1.end()) {
-      set2Only.insert(*it);
+    for (typename set<TYPE>::const_iterator it = setItems2.begin(); it != setItems2.end(); ++it)
+    {
+        if (setItems1.find(*it) == setItems1.end())
+        {
+            set2Only.insert(*it);
+        }
     }
-  }
 }
 
 // remove items that are too close
 template <class TYPE>
-void RemoveCloseNgbrs(const set<TYPE> &setItemsOrig, const TYPE &thresDist,
-                      set<TYPE> &setItemsTrimmed) {
-  // only keep items that are not too close to its predecessor
-  for (typename set<TYPE>::const_iterator it = setItemsOrig.begin();
-       it != setItemsOrig.end(); ++it) {
-    if (setItemsTrimmed.size() == 0 ||
-        *setItemsTrimmed.rbegin() + thresDist < *it) {
-      setItemsTrimmed.insert(*it);
+void RemoveCloseNgbrs(const set<TYPE> &setItemsOrig, const TYPE &thresDist, set<TYPE> &setItemsTrimmed)
+{
+    // only keep items that are not too close to its predecessor
+    for (typename set<TYPE>::const_iterator it = setItemsOrig.begin(); it != setItemsOrig.end(); ++it)
+    {
+        if (setItemsTrimmed.size() == 0 || *setItemsTrimmed.rbegin() + thresDist < *it)
+        {
+            setItemsTrimmed.insert(*it);
+        }
     }
-  }
 }
 
 // add set of item sets to map, based on their sizes
 template <class TYPE>
-void AddItemsToMapOnSizes(const set<set<TYPE> > &setItemSets,
-                          map<int, set<set<TYPE> > > &mapItemSetsOnSize) {
-  // only keep items that are not too close to its predecessor
-  mapItemSetsOnSize.clear();
-  for (typename set<set<TYPE> >::const_iterator it = setItemSets.begin();
-       it != setItemSets.end(); ++it) {
-    int sz = it->size();
-    mapItemSetsOnSize[sz].insert(*it);
-  }
+void AddItemsToMapOnSizes(const set<set<TYPE>> &setItemSets, map<int, set<set<TYPE>>> &mapItemSetsOnSize)
+{
+    // only keep items that are not too close to its predecessor
+    mapItemSetsOnSize.clear();
+    for (typename set<set<TYPE>>::const_iterator it = setItemSets.begin(); it != setItemSets.end(); ++it)
+    {
+        int sz = it->size();
+        mapItemSetsOnSize[sz].insert(*it);
+    }
 }
 
 // add set of item sets to map, based on their sizes
 template <class TYPE>
-void FindCommonItemsInVecs(const vector<vector<TYPE> > &listVecs,
-                           vector<TYPE> &itemsCommon) {
-  itemsCommon.clear();
-  if (listVecs.size() == 0) {
-    return;
-  }
-  //
-  set<TYPE> ssCommon;
-  PopulateSetByVecGen(ssCommon, listVecs[0]);
-  for (int i = 1; i < (int)listVecs.size(); ++i) {
-    set<TYPE> ssCurr;
-    PopulateSetByVecGen(ssCurr, listVecs[i]);
-    set<TYPE> ssJoin;
-    JoinSetsGen(ssCommon, ssCurr, ssJoin);
-    ssCommon = ssJoin;
-  }
-  PopulateVecBySetGen(itemsCommon, ssCommon);
+void FindCommonItemsInVecs(const vector<vector<TYPE>> &listVecs, vector<TYPE> &itemsCommon)
+{
+    itemsCommon.clear();
+    if (listVecs.size() == 0)
+    {
+        return;
+    }
+    //
+    set<TYPE> ssCommon;
+    PopulateSetByVecGen(ssCommon, listVecs[0]);
+    for (int i = 1; i < (int)listVecs.size(); ++i)
+    {
+        set<TYPE> ssCurr;
+        PopulateSetByVecGen(ssCurr, listVecs[i]);
+        set<TYPE> ssJoin;
+        JoinSetsGen(ssCommon, ssCurr, ssJoin);
+        ssCommon = ssJoin;
+    }
+    PopulateVecBySetGen(itemsCommon, ssCommon);
 }
 
 template <class TYPE>
-void SubtractMultisetsGen(multiset<TYPE> &setMain,
-                          const multiset<TYPE> &setSubtracted) {
-  for (typename multiset<TYPE>::const_iterator it = setSubtracted.begin();
-       it != setSubtracted.end(); ++it) {
-    typename multiset<TYPE>::iterator it2 = setMain.find(*it);
-    if (it2 != setMain.end()) {
-      setMain.erase(it2);
+void SubtractMultisetsGen(multiset<TYPE> &setMain, const multiset<TYPE> &setSubtracted)
+{
+    for (typename multiset<TYPE>::const_iterator it = setSubtracted.begin(); it != setSubtracted.end(); ++it)
+    {
+        typename multiset<TYPE>::iterator it2 = setMain.find(*it);
+        if (it2 != setMain.end())
+        {
+            setMain.erase(it2);
+        }
     }
-  }
 }
 
 template <class TYPE>
-void CountMultisetsGen(const multiset<TYPE> &setMS, map<TYPE, int> &mapCounts) {
-  mapCounts.clear();
-  for (typename multiset<TYPE>::const_iterator it = setMS.begin();
-       it != setMS.end(); ++it) {
-    if (mapCounts.find(*it) == mapCounts.end()) {
-      mapCounts[*it] = 0;
+void CountMultisetsGen(const multiset<TYPE> &setMS, map<TYPE, int> &mapCounts)
+{
+    mapCounts.clear();
+    for (typename multiset<TYPE>::const_iterator it = setMS.begin(); it != setMS.end(); ++it)
+    {
+        if (mapCounts.find(*it) == mapCounts.end())
+        {
+            mapCounts[*it] = 0;
+        }
+        ++mapCounts[*it];
     }
-    ++mapCounts[*it];
-  }
 }
 
 template <class TYPE>
-void SubtractMultisetsFreqGen(multiset<TYPE> &setMain,
-                              const multiset<TYPE> &setSubtracted) {
-  map<TYPE, int> mapFreq1, mapFreq2;
-  CountMultisetsGen(setMain, mapFreq1);
-  CountMultisetsGen(setSubtracted, mapFreq2);
-  setMain.clear();
+void SubtractMultisetsFreqGen(multiset<TYPE> &setMain, const multiset<TYPE> &setSubtracted)
+{
+    map<TYPE, int> mapFreq1, mapFreq2;
+    CountMultisetsGen(setMain, mapFreq1);
+    CountMultisetsGen(setSubtracted, mapFreq2);
+    setMain.clear();
 
-  for (typename map<TYPE, int>::const_iterator it = mapFreq1.begin();
-       it != mapFreq1.end(); ++it) {
-    typename map<TYPE, int>::iterator it2 = mapFreq2.find(it->first);
-    int numItemsOut = it->second;
-    if (it2 != mapFreq2.end()) {
-      numItemsOut -= it2->second;
-    }
-    for (int i = 0; i < numItemsOut; ++i) {
-      setMain.insert(it->first);
+    for (typename map<TYPE, int>::const_iterator it = mapFreq1.begin(); it != mapFreq1.end(); ++it)
+    {
+        typename map<TYPE, int>::iterator it2 = mapFreq2.find(it->first);
+        int numItemsOut = it->second;
+        if (it2 != mapFreq2.end())
+        {
+            numItemsOut -= it2->second;
+        }
+        for (int i = 0; i < numItemsOut; ++i)
+        {
+            setMain.insert(it->first);
+        }
     }
-  }
 }
 
 template <class TYPE>
-void CreateMapForVecGen(const vector<TYPE> &vec, map<TYPE, int> &mapIndices) {
-  mapIndices.clear();
-  for (int i = 0; i < (int)vec.size(); ++i) {
-    mapIndices[vec[i]] = i;
-  }
+void CreateMapForVecGen(const vector<TYPE> &vec, map<TYPE, int> &mapIndices)
+{
+    mapIndices.clear();
+    for (int i = 0; i < (int)vec.size(); ++i)
+    {
+        mapIndices[vec[i]] = i;
+    }
 }
 
 template <class TYPE>
-void SegmentVecGen(const vector<TYPE> &vec,
-                   vector<pair<pair<int, int>, TYPE> > &listSegs) {
-  //
-  int beg = 0;
-  for (unsigned int i = 0; i < vec.size(); ++i) {
-    if (vec[i] != vec[beg] || i == (int)vec.size() - 1) {
-      // output one segment
-      int epos = i - 1;
-      if (i == (int)vec.size() - 1) {
-        epos = i;
-      }
-      pair<int, int> pp(beg, epos);
-      pair<pair<int, int>, TYPE> pp2(pp, vec[beg]);
-      listSegs.push_back(pp2);
-      beg = i;
+void SegmentVecGen(const vector<TYPE> &vec, vector<pair<pair<int, int>, TYPE>> &listSegs)
+{
+    //
+    int beg = 0;
+    for (unsigned int i = 0; i < vec.size(); ++i)
+    {
+        if (vec[i] != vec[beg] || i == (int)vec.size() - 1)
+        {
+            // output one segment
+            int epos = i - 1;
+            if (i == (int)vec.size() - 1)
+            {
+                epos = i;
+            }
+            pair<int, int> pp(beg, epos);
+            pair<pair<int, int>, TYPE> pp2(pp, vec[beg]);
+            listSegs.push_back(pp2);
+            beg = i;
+        }
     }
-  }
 }
 
 // other utilities
 int GetZeroOneDiff(int x, int y);
-void GetMatchingPosIntVec(const int val, const vector<int> &listVals,
-                          vector<int> &listPos);
+void GetMatchingPosIntVec(const int val, const vector<int> &listVals, vector<int> &listPos);
 void FormUnitVector(int numItems, int posUnit, vector<int> &vecUnit);
 void FormZeroVector(int numItems, vector<int> &vecUnit);
 bool AreTwoSetsCompatible(const set<int> &set1, const set<int> &set2);
-bool IsSetCompatibleWithSets(const set<int> &set1,
-                             const set<set<int> > &setSets);
-bool AreTwoSetsCompatible(const set<int> &set1, const set<int> &set2,
-                          int numTotElem);
-bool IsSetCompatibleWithSets(const set<int> &set1,
-                             const set<set<int> > &setSets, int numTotElem);
-void GetSetsIntParts(const set<int> &set1, const set<int> &set2,
-                     const set<int> &setAll, set<int> &set1Only,
-                     set<int> &set2Only, set<int> &set12, set<int> &setNone);
-bool IsSignificantFraction(int totNum, int numTypes, int numOneType,
-                           double minFrac = -1.0);
+bool IsSetCompatibleWithSets(const set<int> &set1, const set<set<int>> &setSets);
+bool AreTwoSetsCompatible(const set<int> &set1, const set<int> &set2, int numTotElem);
+bool IsSetCompatibleWithSets(const set<int> &set1, const set<set<int>> &setSets, int numTotElem);
+void GetSetsIntParts(const set<int> &set1, const set<int> &set2, const set<int> &setAll, set<int> &set1Only, set<int> &set2Only, set<int> &set12, set<int> &setNone);
+bool IsSignificantFraction(int totNum, int numTypes, int numOneType, double minFrac = -1.0);
 void IncAllNumInSet(set<int> &sint);
 void DecAllNumInSet(set<int> &sint);
-void IncAllNumInSets(set<set<int> > &setInts);
+void IncAllNumInSets(set<set<int>> &setInts);
 void GetNonZeroPosofVec(const vector<int> &vec, set<int> &setpos);
-void GetDiffPosOfTwoVec(const vector<int> &vec1, const vector<int> &vec2,
-                        set<int> &setpos);
+void GetDiffPosOfTwoVec(const vector<int> &vec1, const vector<int> &vec2, set<int> &setpos);
 int GetSegIndex(int val, const vector<int> &listSegSizes);
 void ComplementBoolVec(vector<bool> &listVals);
-void GetAllGridPoints(int gridLB, int gridUB, int dimGrid,
-                      set<vector<int> > &setGridPts);
-// void ReduceContainerSetsForSets(vector<set<int> > &listSets);
-void MapIntListToAnother(const vector<int> &vec1, const vector<int> &vec2,
-                         map<int, int> &mapVec1IndexToVec2);
-void FindEvenDistriPoints(double valMin, double valMax, double valResolution,
-                          int maxNumPoints, vector<double> &listChosenVals);
+void GetAllGridPoints(int gridLB, int gridUB, int dimGrid, set<vector<int>> &setGridPts);
+//void ReduceContainerSetsForSets(vector<set<int> > &listSets);
+void MapIntListToAnother(const vector<int> &vec1, const vector<int> &vec2, map<int, int> &mapVec1IndexToVec2);
+void FindEvenDistriPoints(double valMin, double valMax, double valResolution, int maxNumPoints, vector<double> &listChosenVals);
 double CalcProductBetween(int lb, int ub);
-void CreateClustersFromMultisets(
-    const multiset<multiset<int> > &setMultisets,
-    map<multiset<int>, vector<multiset<int> > > &mapMultisetClusters);
+void CreateClustersFromMultisets(const multiset<multiset<int>> &setMultisets, map<multiset<int>, vector<multiset<int>>> &mapMultisetClusters);
 void CountMultiset(const multiset<int> &s1, map<int, int> &msMap);
 bool IsMultisetContainedIn(const multiset<int> &s1, const multiset<int> &s2);
 void DumpIntMultiset(const multiset<int> &ms);
-void OutputStringsToFile(const char *filename,
-                         const vector<string> &listStrsOut);
-// void ConvIntToVecGen( unsigned int val, vector<int> &vec, int numBits, int
-// base);
+void OutputStringsToFile(const char *filename, const vector<string> &listStrsOut);
+//void ConvIntToVecGen( unsigned int val, vector<int> &vec, int numBits, int base);
 unsigned int ConvVecToIntGen(const vector<int> &vec, int base);
-// void ConvIntToVecMSBGen( unsigned int val, vector<int> &vec, int numBits, int
-// base);
+//void ConvIntToVecMSBGen( unsigned int val, vector<int> &vec, int numBits, int base);
 unsigned int ConvVecToIntGenMSB(const vector<int> &vec, int base);
 int ConvVecToIntGenBounds(const vector<int> &vec, const vector<int> &bounds);
 void ConvIntToVecGen(int val, const vector<int> &bounds, vector<int> &vec);
-int ConvRowMajorPosVecToIntGenBounds(const vector<int> &vec,
-                                     const vector<int> &bounds);
-void ConvRowMajorIntPosToVecGen(int val, const vector<int> &bounds,
-                                vector<int> &vec);
-void ClusterLinearPoints(const vector<double> &listPoints,
-                         double ratioMaxInOutCmp, vector<int> &listBkpts);
-void FindConsecutiveIntervals(const set<int> &setItems,
-                              vector<pair<int, int> > &listIVs);
+int ConvRowMajorPosVecToIntGenBounds(const vector<int> &vec, const vector<int> &bounds);
+void ConvRowMajorIntPosToVecGen(int val, const vector<int> &bounds, vector<int> &vec);
+void ClusterLinearPoints(const vector<double> &listPoints, double ratioMaxInOutCmp, vector<int> &listBkpts);
+void FindConsecutiveIntervals(const set<int> &setItems, vector<pair<int, int>> &listIVs);
 void ComplementIntSet(int numTot, set<int> &setToComp);
-void GetCountsItems(int range, const set<int> &listNumbers,
-                    vector<int> &listCnts);
-void FindGapBlocksWithinPosVec(const vector<int> &posvec, int numItemsEnum,
-                               int numItemsGap,
-                               vector<pair<int, int> > &listSegs);
+void GetCountsItems(int range, const set<int> &listNumbers, vector<int> &listCnts);
+void FindGapBlocksWithinPosVec(const vector<int> &posvec, int numItemsEnum, int numItemsGap, vector<pair<int, int>> &listSegs);
 
 // bits operation
 bool IsBitSetInt(int val, int posBit);
diff --git a/trisicell/external/scistree/UtilsNumerical.cpp b/trisicell/external/scistree/UtilsNumerical.cpp
index 08444ae..ad308d5 100644
--- a/trisicell/external/scistree/UtilsNumerical.cpp
+++ b/trisicell/external/scistree/UtilsNumerical.cpp
@@ -1,7 +1,7 @@
 #include "UtilsNumerical.h"
+#include <cstdlib>
 #include "Utils3.h"
 #include <cmath>
-#include <cstdlib>
 
 // Some matrix utilities
 // YW: seem to be some risk of memory issue: not freeing???
@@ -66,43 +66,38 @@ T MatrixPermanent(const vector<T>& A, int n)
 
 #endif
 
-///////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////
 
-double NumericalAlgoUtils ::Func1DMinBrent(double ax, double bx, double cx,
-                                           double tol, double *xmin) {
-  // cout << "Func1DMinBrent: " << ", [" << ax << ", " << bx << ", " << cx  <<
-  // ", tol  " << tol << "], \n";
-  // YW: this function is based Numerical Receipe in C book.
-  // search for best 1 D function (in this case, the likelihood) using Brent's
-  // method
-  // Given a function f, and given a bracketing triplet of abscissas ax, bx, cx
-  // (such that bx is between ax and cx, and f(bx) is less than both f(ax) and
-  // f(cx)), this routine isolates the minimum to a fractional precision of
-  // about tol using Brent�s method. The abscissa of the minimum is returned as
-  // xmin, and the minimum function value is returned as brent, the returned
-  // function value.
+double NumericalAlgoUtils ::Func1DMinBrent(double ax, double bx, double cx, double tol, double *xmin)
+{
+	//cout << "Func1DMinBrent: " << ", [" << ax << ", " << bx << ", " << cx  << ", tol  " << tol << "], \n";
+	// YW: this function is based Numerical Receipe in C book.
+	// search for best 1 D function (in this case, the likelihood) using Brent's method
+	//Given a function f, and given a bracketing triplet of abscissas ax, bx, cx (such that bx is
+	//between ax and cx, and f(bx) is less than both f(ax) and f(cx)), this routine isolates
+	//the minimum to a fractional precision of about tol using Brent�s method. The abscissa of
+	//the minimum is returned as xmin, and the minimum function value is returned as brent, the
+	//returned function value.
 #define ITMAX 100
 #define CGOLD 0.3819660
 #define ZEPS 1.0e-10
-// Here ITMAX is the maximum allowed number of iterations; CGOLD is the golden
-// ratio; ZEPS is a small number that protects against trying to achieve
-// fractional accuracy for a minimum that happens to be exactly zero.
-#define SHFT(a, b, c, d)                                                       \
-  (a) = (b);                                                                   \
-  (b) = (c);                                                                   \
-  (c) = (d);
+//Here ITMAX is the maximum allowed number of iterations; CGOLD is the golden ratio; ZEPS is
+//a small number that protects against trying to achieve fractional accuracy for a minimum that
+//happens to be exactly zero.
+#define SHFT(a, b, c, d) \
+	(a) = (b);           \
+	(b) = (c);           \
+	(c) = (d);
 #define SIGN(a, b) ((b) >= 0.0 ? fabs(a) : -fabs(a))
 
-  // cout << "Func1DMinBrent: ax=" << ax << ", bx=" << bx << ", cx=" << cx << ",
-  // tol=" << tol << endl;
-  int iter;
-  double a, b, d = 0.0, etemp, fu, fv, fw, fx, p, q, r, tol1, tol2, u, v, w, x,
-               xm;
-  double e = 0.0; // This will be the distance moved on the step before last.
-  a = (ax < cx ? ax : cx); // a and b must be in ascending order,
-  b = (ax > cx ? ax : cx); // but input abscissas need not be.
-  x = w = v = bx;          // Initializations...
-  fw = fv = fx = EvaluateAt(x, NULL);
+	//cout << "Func1DMinBrent: ax=" << ax << ", bx=" << bx << ", cx=" << cx << ", tol=" << tol << endl;
+	int iter;
+	double a, b, d = 0.0, etemp, fu, fv, fw, fx, p, q, r, tol1, tol2, u, v, w, x, xm;
+	double e = 0.0;			 // This will be the distance moved on the step before last.
+	a = (ax < cx ? ax : cx); //a and b must be in ascending order,
+	b = (ax > cx ? ax : cx); // but input abscissas need not be.
+	x = w = v = bx;			 //Initializations...
+	fw = fv = fx = EvaluateAt(x, NULL);
 
 #if 0
 	// in case f(a) < f(b) < f(c), stop
@@ -123,165 +118,187 @@ cout << "fa1 = " << fa1 << " for a = " << a << ", fb1= " << fb1 << " for b = " <
 	}
 #endif
 
-  for (iter = 1; iter <= ITMAX; iter++) { // Main program loop.
-    // cout << "iteration " << iter << endl;
-    xm = 0.5 * (a + b);
-    tol2 = 2.0 * (tol1 = tol * fabs(x) + ZEPS);
-    if (fabs(x - xm) <= (tol2 - 0.5 * (b - a))) { // Test for done here.
-      *xmin = x;
-      // cout << "x = " << x << ", xm = " << xm << ", tol2 = " << tol2 << ", b =
-      // " << b << ", a = " << a << endl; cout << "Here: STOP EARLY\n";
-      return fx;
-    }
-    if (fabs(e) > tol1) { // Construct a trial parabolic fit.
-                          // cout << "here...\n";
-      r = (x - w) * (fx - fv);
-      q = (x - v) * (fx - fw);
-      p = (x - v) * q - (x - w) * r;
-      q = 2.0 * (q - r);
-      if (q > 0.0)
-        p = -p;
-      q = fabs(q);
-      etemp = e;
-      e = d;
-      if (fabs(p) >= fabs(0.5 * q * etemp) || p <= q * (a - x) ||
-          p >= q * (b - x))
-        d = CGOLD * (e = (x >= xm ? a - x : b - x));
-      // The above conditions determine the acceptability of the parabolic fit.
-      // Here we take the golden section step into the larger of the two
-      // segments.
-      else {
-        d = p / q; // Take the parabolic step.
-        u = x + d;
-        if (u - a < tol2 || b - u < tol2)
-          d = SIGN(tol1, xm - x);
-      }
-    } else {
-      // cout << "here2\n";
-      d = CGOLD * (e = (x >= xm ? a - x : b - x));
-    }
-    u = (fabs(d) >= tol1 ? x + d : x + SIGN(tol1, d));
-    // cout << "u=" << u << endl;
-    fu = EvaluateAt(u, NULL);
-    // This is the one function evaluation per iteration.
-    if (fu <= fx) { // Now decide what to do with our func
-      if (u >= x)
-        a = x;
-      else
-        b = x;         // tion evaluation.
-      SHFT(v, w, x, u) // Housekeeping follows:
-      SHFT(fv, fw, fx, fu)
-    } else {
-      if (u < x)
-        a = u;
-      else
-        b = u;
-      if (fu <= fw || w == x) {
-        v = w;
-        w = u;
-        fv = fw;
-        fw = fu;
-      } else if (fu <= fv || v == x || v == w) {
-        v = u;
-        fv = fu;
-      }
-    } // Done with housekeeping. Back for
-    // cout << "** -fx = " << -1.0*fx << endl;
-  } // another iteration.
-  // YW_ASSERT_INFO(false, "Too many iterations in brent");
-  cout << "WARNING: Too many iterations in brent.\n";
-  *xmin = x; // Never get here.
-  return fx;
+	for (iter = 1; iter <= ITMAX; iter++)
+	{	//Main program loop.
+		//cout << "iteration " << iter << endl;
+		xm = 0.5 * (a + b);
+		tol2 = 2.0 * (tol1 = tol * fabs(x) + ZEPS);
+		if (fabs(x - xm) <= (tol2 - 0.5 * (b - a)))
+		{ //Test for done here.
+			*xmin = x;
+			//cout << "x = " << x << ", xm = " << xm << ", tol2 = " << tol2 << ", b = " << b << ", a = " << a << endl;
+			//cout << "Here: STOP EARLY\n";
+			return fx;
+		}
+		if (fabs(e) > tol1)
+		{	// Construct a trial parabolic fit.
+			//cout << "here...\n";
+			r = (x - w) * (fx - fv);
+			q = (x - v) * (fx - fw);
+			p = (x - v) * q - (x - w) * r;
+			q = 2.0 * (q - r);
+			if (q > 0.0)
+				p = -p;
+			q = fabs(q);
+			etemp = e;
+			e = d;
+			if (fabs(p) >= fabs(0.5 * q * etemp) || p <= q * (a - x) || p >= q * (b - x))
+				d = CGOLD * (e = (x >= xm ? a - x : b - x));
+			//The above conditions determine the acceptability of the parabolic fit. Here we
+			//take the golden section step into the larger of the two segments.
+			else
+			{
+				d = p / q; //Take the parabolic step.
+				u = x + d;
+				if (u - a < tol2 || b - u < tol2)
+					d = SIGN(tol1, xm - x);
+			}
+		}
+		else
+		{
+			//cout << "here2\n";
+			d = CGOLD * (e = (x >= xm ? a - x : b - x));
+		}
+		u = (fabs(d) >= tol1 ? x + d : x + SIGN(tol1, d));
+		//cout << "u=" << u << endl;
+		fu = EvaluateAt(u, NULL);
+		//This is the one function evaluation per iteration.
+		if (fu <= fx)
+		{ //Now decide what to do with our func
+			if (u >= x)
+				a = x;
+			else
+				b = x;		 //tion evaluation.
+			SHFT(v, w, x, u) //Housekeeping follows:
+			SHFT(fv, fw, fx, fu)
+		}
+		else
+		{
+			if (u < x)
+				a = u;
+			else
+				b = u;
+			if (fu <= fw || w == x)
+			{
+				v = w;
+				w = u;
+				fv = fw;
+				fw = fu;
+			}
+			else if (fu <= fv || v == x || v == w)
+			{
+				v = u;
+				fv = fu;
+			}
+		} //Done with housekeeping. Back for
+		  //cout << "** -fx = " << -1.0*fx << endl;
+	}	  //another iteration.
+		  //YW_ASSERT_INFO(false, "Too many iterations in brent");
+	cout << "WARNING: Too many iterations in brent.\n";
+	*xmin = x; //Never get here.
+	return fx;
 }
 
-bool NumericalAlgoUtils ::IsSignificantlyLarge(double v1, double v2) const {
-  // is v1 significantly larger than v2 (i.e. larger by some threshold)?
-  // by default, the computed values are in log-space, and thus we ask then to
-  // differ by at least 5%
-  const double thresDef = log(1.05);
-  return v1 >= v2 + thresDef;
+bool NumericalAlgoUtils ::IsSignificantlyLarge(double v1, double v2) const
+{
+	// is v1 significantly larger than v2 (i.e. larger by some threshold)?
+	// by default, the computed values are in log-space, and thus we ask then to differ by at least 5%
+	const double thresDef = log(1.05);
+	return v1 >= v2 + thresDef;
 }
 
-bool NumericalAlgoUtils ::IsLikeliSignificantlyLargeThresNum(double valLikeli1,
-                                                             double valLikeli2,
-                                                             int numItems,
-                                                             double thres) {
-  // assume both are log-likelihood; thres: log(1.05) say
-  // is likeli1 (per item) is significantly larger than likeli2 (per item)?
-  double valLikeli1Ave = valLikeli1 / numItems;
-  double valLikeli2Ave = valLikeli2 / numItems;
-  return valLikeli1Ave >= valLikeli2Ave + thres;
+bool NumericalAlgoUtils ::IsLikeliSignificantlyLargeThresNum(double valLikeli1, double valLikeli2, int numItems, double thres)
+{
+	// assume both are log-likelihood; thres: log(1.05) say
+	// is likeli1 (per item) is significantly larger than likeli2 (per item)?
+	double valLikeli1Ave = valLikeli1 / numItems;
+	double valLikeli2Ave = valLikeli2 / numItems;
+	return valLikeli1Ave >= valLikeli2Ave + thres;
 }
 
-//////////////////////////////////////////////////////////////////////////////////////////////////////////
-double RoundDoubleValTo(double val, int numFractionDigits) {
-  // numFractiondigits: how many digits after . we want to keep
-  YW_ASSERT_INFO(numFractionDigits >= 0, "numFracDigits:; must be positive");
-  double ratioInc = pow(10.0, numFractionDigits);
-  return round(val * ratioInc) / ratioInc;
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+double RoundDoubleValTo(double val, int numFractionDigits)
+{
+	// numFractiondigits: how many digits after . we want to keep
+	YW_ASSERT_INFO(numFractionDigits >= 0, "numFracDigits:; must be positive");
+	double ratioInc = pow(10.0, numFractionDigits);
+	return round(val * ratioInc) / ratioInc;
 }
 
-int GetCeilingPowerOf(int val, int base) {
-  // given a value, find the smallest (positive) power of base that is at least
-  // this value
-  int res = 1;
-  while (val > res) {
-    res *= base;
-  }
-  return res;
+int GetCeilingPowerOf(int val, int base)
+{
+	// given a value, find the smallest (positive) power of base that is at least this value
+	int res = 1;
+	while (val > res)
+	{
+		res *= base;
+	}
+	return res;
 }
 
 // statistics related
-double CalcApproxCDFStdNormal(double val) {
-  //
-  const double pi = 3.1415926535897;
-  double sign = 1.0;
-  if (val < 0) {
-    sign = -1.0;
-  }
-  return 0.5 * (1.0 + sign * (sqrt(1.0 - exp(-2.0 * val * val / pi))));
+double CalcApproxCDFStdNormal(double val)
+{
+	//
+	const double pi = 3.1415926535897;
+	double sign = 1.0;
+	if (val < 0)
+	{
+		sign = -1.0;
+	}
+	return 0.5 * (1.0 + sign * (sqrt(1.0 - exp(-2.0 * val * val / pi))));
 }
 
-double CalcBinomialProb(double p, int n, int k) {
-  YW_ASSERT_INFO(k <= n, "CalcBinomialProb: k must be smaller than n");
-  double res = pow(p, k) * pow(1.0 - p, n - k) * CalcNumNChooseK(n, k);
-  return res;
+double CalcBinomialProb(double p, int n, int k)
+{
+	YW_ASSERT_INFO(k <= n, "CalcBinomialProb: k must be smaller than n");
+	double res = pow(p, k) * pow(1.0 - p, n - k) * CalcNumNChooseK(n, k);
+	return res;
 }
 
-int RoundToInt(double val) { return (int)(val + 0.5); }
+int RoundToInt(double val)
+{
+	return (int)(val + 0.5);
+}
 
-bool IsConvergedWithin(double valCurr, double valPre, double maxDiffFrac) {
-  double valDiff = std::abs(valCurr - valPre);
-  double valBase1 = std::abs(valCurr);
-  double valBase2 = std::abs(valPre);
-  double valBase = std::max(valBase1, valBase2);
-  return valDiff <= maxDiffFrac * valBase;
+bool IsConvergedWithin(double valCurr, double valPre, double maxDiffFrac)
+{
+	double valDiff = std::abs(valCurr - valPre);
+	double valBase1 = std::abs(valCurr);
+	double valBase2 = std::abs(valPre);
+	double valBase = std::max(valBase1, valBase2);
+	return valDiff <= maxDiffFrac * valBase;
 }
 
-void NormalizeVec(vector<double> &vecDoubles) {
-  double sum = GetSumOfElements(vecDoubles);
-  YW_ASSERT_INFO(sum > 0.0, "Cannot normalize a zero vector");
-  for (int i = 0; i < (int)vecDoubles.size(); ++i) {
-    vecDoubles[i] = vecDoubles[i] / sum;
-  }
+void NormalizeVec(vector<double> &vecDoubles)
+{
+	double sum = GetSumOfElements(vecDoubles);
+	YW_ASSERT_INFO(sum > 0.0, "Cannot normalize a zero vector");
+	for (int i = 0; i < (int)vecDoubles.size(); ++i)
+	{
+		vecDoubles[i] = vecDoubles[i] / sum;
+	}
 }
 
-double CalcSumOfSquareError(const vector<double> &vecDoubles1,
-                            const vector<double> &vecDoubles2) {
-  //
-  double res = 0.0;
-  YW_ASSERT_INFO(vecDoubles1.size() == vecDoubles2.size(), "Sizes don't match");
-  for (int i = 0; i < (int)vecDoubles1.size(); ++i) {
-    double diff = vecDoubles1[i] - vecDoubles2[i];
-    res += diff * diff;
-  }
-  return res;
+double CalcSumOfSquareError(const vector<double> &vecDoubles1, const vector<double> &vecDoubles2)
+{
+	//
+	double res = 0.0;
+	YW_ASSERT_INFO(vecDoubles1.size() == vecDoubles2.size(), "Sizes don't match");
+	for (int i = 0; i < (int)vecDoubles1.size(); ++i)
+	{
+		double diff = vecDoubles1[i] - vecDoubles2[i];
+		res += diff * diff;
+	}
+	return res;
 }
 
-double CalcFactorial(int n) {
-  double res = 1.0;
-  for (int i = 2; i <= n; ++i) {
-    res *= i;
-  }
-  return res;
+double CalcFactorial(int n)
+{
+	double res = 1.0;
+	for (int i = 2; i <= n; ++i)
+	{
+		res *= i;
+	}
+	return res;
 }
diff --git a/trisicell/external/scistree/UtilsNumerical.h b/trisicell/external/scistree/UtilsNumerical.h
index 69e250a..93aa2b7 100644
--- a/trisicell/external/scistree/UtilsNumerical.h
+++ b/trisicell/external/scistree/UtilsNumerical.h
@@ -2,8 +2,8 @@
 #define UTILS_NUMERICAL_H
 
 #include "Utils.h"
-#include <cmath>
 #include <vector>
+#include <cmath>
 using namespace std;
 
 // someuseful definitions
@@ -13,87 +13,94 @@ const double MIN_POS_VAL = 1.0e-40;
 
 // Some matrix utilities
 
-// template<class T>
-// T MatrixPermanent(const vector<T>& A, int n); // expects n by n matrix
-// encoded as vector
-inline int *dec2binarr(long n, int dim) {
-  // note: res[dim] will save the sum res[0]+...+res[dim-1]
-  int *res = (int *)calloc(dim + 1, sizeof(int));
-  int pos = dim - 1;
-
-  // note: this will crash if dim < log_2(n)...
-  while (n > 0) {
-    res[pos] = n % 2;
-    res[dim] += res[pos];
-    n = n / 2; // integer division
-    pos--;
-  }
-
-  return res;
+//template<class T>
+//T MatrixPermanent(const vector<T>& A, int n); // expects n by n matrix encoded as vector
+inline int *dec2binarr(long n, int dim)
+{
+    // note: res[dim] will save the sum res[0]+...+res[dim-1]
+    int *res = (int *)calloc(dim + 1, sizeof(int));
+    int pos = dim - 1;
+
+    // note: this will crash if dim < log_2(n)...
+    while (n > 0)
+    {
+        res[pos] = n % 2;
+        res[dim] += res[pos];
+        n = n / 2; // integer division
+        pos--;
+    }
+
+    return res;
 }
 
-template <class T> T MatrixPermanent(const vector<T> &A, int n) {
-  // cout << "MatrixPermanent: n = " << n << endl;
-  // expects n by n matrix encoded as vector
-  T sum = 0;
-  T rowsumprod, rowsum;
-  // int* chi = new int[n + 1];
-  int *chi;
-  double C = (double)pow((double)2, n);
-
-  // loop all 2^n submatrices of A
-  for (int k = 1; k < C; k++) {
-    // cout << "k = " << k << endl;
-    rowsumprod = 1;
-    chi = dec2binarr(k, n); // characteristic vector
-
-    // loop columns of submatrix #k
-    for (int m = 0; m < n; m++) {
-      // cout << "m = " << m << endl;
-      rowsum = 0;
-
-      // loop rows and compute rowsum
-      for (int p = 0; p < n; p++) {
-        // cout << "p = " << p << endl;
-        YW_ASSERT_INFO(m * n + p < (int)A.size(), "array out of bound");
-        rowsum += chi[p] * A[m * n + p];
-      }
-      // update product of rowsums
-      rowsumprod *= rowsum;
-
-      // (optional -- use for sparse matrices)
-      // if (rowsumprod == 0) break;
+template <class T>
+T MatrixPermanent(const vector<T> &A, int n)
+{
+    //cout << "MatrixPermanent: n = " << n << endl;
+    // expects n by n matrix encoded as vector
+    T sum = 0;
+    T rowsumprod, rowsum;
+    //int* chi = new int[n + 1];
+    int *chi;
+    double C = (double)pow((double)2, n);
+
+    // loop all 2^n submatrices of A
+    for (int k = 1; k < C; k++)
+    {
+        //cout << "k = " << k << endl;
+        rowsumprod = 1;
+        chi = dec2binarr(k, n); // characteristic vector
+
+        // loop columns of submatrix #k
+        for (int m = 0; m < n; m++)
+        {
+            //cout << "m = " << m << endl;
+            rowsum = 0;
+
+            // loop rows and compute rowsum
+            for (int p = 0; p < n; p++)
+            {
+                //cout << "p = " << p << endl;
+                YW_ASSERT_INFO(m * n + p < (int)A.size(), "array out of bound");
+                rowsum += chi[p] * A[m * n + p];
+            }
+            // update product of rowsums
+            rowsumprod *= rowsum;
+
+            // (optional -- use for sparse matrices)
+            // if (rowsumprod == 0) break;
+        }
+
+        sum += (T)pow((double)-1, n - chi[n]) * rowsumprod;
+        free(chi);
     }
 
-    sum += (T)pow((double)-1, n - chi[n]) * rowsumprod;
-    free(chi);
-  }
-
-  // delete [] chi;
+    //delete [] chi;
 
-  return sum;
+    return sum;
 }
 
 // compute the product
-template <class T> T CalcProductOfVec(const vector<T> &A) {
-  YW_ASSERT_INFO(A.size() > 0, "Must have at least one item");
-  T res = A[0];
-  for (int i = 1; i < (int)A.size(); ++i) {
-    res *= A[i];
-  }
-  return res;
+template <class T>
+T CalcProductOfVec(const vector<T> &A)
+{
+    YW_ASSERT_INFO(A.size() > 0, "Must have at least one item");
+    T res = A[0];
+    for (int i = 1; i < (int)A.size(); ++i)
+    {
+        res *= A[i];
+    }
+    return res;
 }
 
 // useful algorithms like Brent's method
-class NumericalAlgoUtils {
+class NumericalAlgoUtils
+{
 public:
-  virtual double EvaluateAt(double pt, void *pParam) = 0;
-  double Func1DMinBrent(double ax, double bx, double cx, double tol,
-                        double *xmin);
-  virtual bool IsSignificantlyLarge(double v1, double v2) const;
-  static bool IsLikeliSignificantlyLargeThresNum(double valLikeli1,
-                                                 double valLikeli2,
-                                                 int numItems, double thres);
+    virtual double EvaluateAt(double pt, void *pParam) = 0;
+    double Func1DMinBrent(double ax, double bx, double cx, double tol, double *xmin);
+    virtual bool IsSignificantlyLarge(double v1, double v2) const;
+    static bool IsLikeliSignificantlyLargeThresNum(double valLikeli1, double valLikeli2, int numItems, double thres);
 };
 
 // statistics related
@@ -106,8 +113,7 @@ int GetCeilingPowerOf(int val, int base);
 int RoundToInt(double val);
 bool IsConvergedWithin(double valCurr, double valPre, double maxDiffFrac);
 void NormalizeVec(vector<double> &vecDoubles);
-double CalcSumOfSquareError(const vector<double> &vecDoubles1,
-                            const vector<double> &vecDoubles2);
+double CalcSumOfSquareError(const vector<double> &vecDoubles1, const vector<double> &vecDoubles2);
 double CalcFactorial(int n);
 
 #endif
diff --git a/trisicell/external/scistree/ctpl_stl.h b/trisicell/external/scistree/ctpl_stl.h
new file mode 100644
index 0000000..c6766c7
--- /dev/null
+++ b/trisicell/external/scistree/ctpl_stl.h
@@ -0,0 +1,282 @@
+/*********************************************************
+*
+*  Copyright (C) 2014 by Vitaliy Vitsentiy
+*
+*  Licensed under the Apache License, Version 2.0 (the "License");
+*  you may not use this file except in compliance with the License.
+*  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+*  Unless required by applicable law or agreed to in writing, software
+*  distributed under the License is distributed on an "AS IS" BASIS,
+*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+*  See the License for the specific language governing permissions and
+*  limitations under the License.
+*
+*********************************************************/
+
+#ifndef __ctpl_stl_thread_pool_H__
+#define __ctpl_stl_thread_pool_H__
+
+#include <functional>
+#include <thread>
+#include <atomic>
+#include <vector>
+#include <memory>
+#include <exception>
+#include <future>
+#include <mutex>
+#include <queue>
+
+// thread pool to run user's functors with signature
+//      ret func(int id, other_params)
+// where id is the index of the thread that runs the functor
+// ret is some return type
+
+namespace ctpl
+{
+
+    namespace detail
+    {
+        template <typename T>
+        class Queue
+        {
+        public:
+            bool push(T const &value)
+            {
+                std::unique_lock<std::mutex> lock(this->mutex);
+                this->q.push(value);
+                return true;
+            }
+            // deletes the retrieved element, do not use for non integral types
+            bool pop(T &v)
+            {
+                std::unique_lock<std::mutex> lock(this->mutex);
+                if (this->q.empty())
+                    return false;
+                v = this->q.front();
+                this->q.pop();
+                return true;
+            }
+            bool empty()
+            {
+                std::unique_lock<std::mutex> lock(this->mutex);
+                return this->q.empty();
+            }
+
+        private:
+            std::queue<T> q;
+            std::mutex mutex;
+        };
+    }
+
+    class thread_pool
+    {
+
+    public:
+        thread_pool() { this->init(); }
+        thread_pool(int nThreads)
+        {
+            this->init();
+            this->resize(nThreads);
+        }
+
+        // the destructor waits for all the functions in the queue to be finished
+        ~thread_pool()
+        {
+            this->stop(true);
+        }
+
+        // get the number of running threads in the pool
+        int size() { return static_cast<int>(this->threads.size()); }
+
+        // number of idle threads
+        int n_idle() { return this->nWaiting; }
+        std::thread &get_thread(int i) { return *this->threads[i]; }
+
+        // change the number of threads in the pool
+        // should be called from one thread, otherwise be careful to not interleave, also with this->stop()
+        // nThreads must be >= 0
+        void resize(int nThreads)
+        {
+            if (!this->isStop && !this->isDone)
+            {
+                int oldNThreads = static_cast<int>(this->threads.size());
+                if (oldNThreads <= nThreads)
+                { // if the number of threads is increased
+                    this->threads.resize(nThreads);
+                    this->flags.resize(nThreads);
+
+                    for (int i = oldNThreads; i < nThreads; ++i)
+                    {
+                        this->flags[i] = std::make_shared<std::atomic<bool>>(false);
+                        this->set_thread(i);
+                    }
+                }
+                else
+                { // the number of threads is decreased
+                    for (int i = oldNThreads - 1; i >= nThreads; --i)
+                    {
+                        *this->flags[i] = true; // this thread will finish
+                        this->threads[i]->detach();
+                    }
+                    {
+                        // stop the detached threads that were waiting
+                        std::unique_lock<std::mutex> lock(this->mutex);
+                        this->cv.notify_all();
+                    }
+                    this->threads.resize(nThreads); // safe to delete because the threads are detached
+                    this->flags.resize(nThreads);   // safe to delete because the threads have copies of shared_ptr of the flags, not originals
+                }
+            }
+        }
+
+        // empty the queue
+        void clear_queue()
+        {
+            std::function<void(int id)> *_f;
+            while (this->q.pop(_f))
+                delete _f; // empty the queue
+        }
+
+        // pops a functional wrapper to the original function
+        std::function<void(int)> pop()
+        {
+            std::function<void(int id)> *_f = nullptr;
+            this->q.pop(_f);
+            std::unique_ptr<std::function<void(int id)>> func(_f); // at return, delete the function even if an exception occurred
+            std::function<void(int)> f;
+            if (_f)
+                f = *_f;
+            return f;
+        }
+
+        // wait for all computing threads to finish and stop all threads
+        // may be called asynchronously to not pause the calling thread while waiting
+        // if isWait == true, all the functions in the queue are run, otherwise the queue is cleared without running the functions
+        void stop(bool isWait = false)
+        {
+            if (!isWait)
+            {
+                if (this->isStop)
+                    return;
+                this->isStop = true;
+                for (int i = 0, n = this->size(); i < n; ++i)
+                {
+                    *this->flags[i] = true; // command the threads to stop
+                }
+                this->clear_queue(); // empty the queue
+            }
+            else
+            {
+                if (this->isDone || this->isStop)
+                    return;
+                this->isDone = true; // give the waiting threads a command to finish
+            }
+            {
+                std::unique_lock<std::mutex> lock(this->mutex);
+                this->cv.notify_all(); // stop all waiting threads
+            }
+            for (int i = 0; i < static_cast<int>(this->threads.size()); ++i)
+            { // wait for the computing threads to finish
+                if (this->threads[i]->joinable())
+                    this->threads[i]->join();
+            }
+            // if there were no threads in the pool but some functors in the queue, the functors are not deleted by the threads
+            // therefore delete them here
+            this->clear_queue();
+            this->threads.clear();
+            this->flags.clear();
+        }
+
+        template <typename F, typename... Rest>
+        auto push(F &&f, Rest &&...rest) -> std::future<decltype(f(0, rest...))>
+        {
+            auto pck = std::make_shared<std::packaged_task<decltype(f(0, rest...))(int)>>(
+                std::bind(std::forward<F>(f), std::placeholders::_1, std::forward<Rest>(rest)...));
+            auto _f = new std::function<void(int id)>([pck](int id)
+                                                      { (*pck)(id); });
+            this->q.push(_f);
+            std::unique_lock<std::mutex> lock(this->mutex);
+            this->cv.notify_one();
+            return pck->get_future();
+        }
+
+        // run the user's function that excepts argument int - id of the running thread. returned value is templatized
+        // operator returns std::future, where the user can get the result and rethrow the catched exceptins
+        template <typename F>
+        auto push(F &&f) -> std::future<decltype(f(0))>
+        {
+            auto pck = std::make_shared<std::packaged_task<decltype(f(0))(int)>>(std::forward<F>(f));
+            auto _f = new std::function<void(int id)>([pck](int id)
+                                                      { (*pck)(id); });
+            this->q.push(_f);
+            std::unique_lock<std::mutex> lock(this->mutex);
+            this->cv.notify_one();
+            return pck->get_future();
+        }
+
+    private:
+        // deleted
+        thread_pool(const thread_pool &);            // = delete;
+        thread_pool(thread_pool &&);                 // = delete;
+        thread_pool &operator=(const thread_pool &); // = delete;
+        thread_pool &operator=(thread_pool &&);      // = delete;
+
+        void set_thread(int i)
+        {
+            std::shared_ptr<std::atomic<bool>> flag(this->flags[i]); // a copy of the shared ptr to the flag
+            auto f = [this, i, flag /* a copy of the shared ptr to the flag */]()
+            {
+                std::atomic<bool> &_flag = *flag;
+                std::function<void(int id)> *_f;
+                bool isPop = this->q.pop(_f);
+                while (true)
+                {
+                    while (isPop)
+                    {                                                          // if there is anything in the queue
+                        std::unique_ptr<std::function<void(int id)>> func(_f); // at return, delete the function even if an exception occurred
+                        (*_f)(i);
+                        if (_flag)
+                            return; // the thread is wanted to stop, return even if the queue is not empty yet
+                        else
+                            isPop = this->q.pop(_f);
+                    }
+                    // the queue is empty here, wait for the next command
+                    std::unique_lock<std::mutex> lock(this->mutex);
+                    ++this->nWaiting;
+                    this->cv.wait(lock, [this, &_f, &isPop, &_flag]()
+                                  {
+                                      isPop = this->q.pop(_f);
+                                      return isPop || this->isDone || _flag;
+                                  });
+                    --this->nWaiting;
+                    if (!isPop)
+                        return; // if the queue is empty and this->isDone == true or *flag then return
+                }
+            };
+            this->threads[i].reset(new std::thread(f)); // compiler may not support std::make_unique()
+        }
+
+        void init()
+        {
+            this->nWaiting = 0;
+            this->isStop = false;
+            this->isDone = false;
+        }
+
+        std::vector<std::unique_ptr<std::thread>> threads;
+        std::vector<std::shared_ptr<std::atomic<bool>>> flags;
+        detail::Queue<std::function<void(int id)> *> q;
+        std::atomic<bool> isDone;
+        std::atomic<bool> isStop;
+        std::atomic<int> nWaiting; // how many threads are waiting
+
+        std::mutex mutex;
+        std::condition_variable cv;
+    };
+
+}
+
+#endif // __ctpl_stl_thread_pool_H__
diff --git a/trisicell/external/scistree/main.cpp b/trisicell/external/scistree/main.cpp
index 0f75f25..b9f0e56 100644
--- a/trisicell/external/scistree/main.cpp
+++ b/trisicell/external/scistree/main.cpp
@@ -1,23 +1,24 @@
+#include <iostream>
+#include <fstream>
+#include <sstream>
 #include <cstdio>
 #include <cstring>
-#include <fstream>
-#include <iostream>
+#include <vector>
 #include <map>
-#include <sstream>
 #include <sys/types.h>
 #include <time.h>
 #include <unistd.h>
-#include <vector>
+#include <chrono>
 
 using namespace std;
 
-#include "ScistDoublet.hpp"
-#include "ScistErrRateInf.hpp"
-#include "ScistGenotype.hpp"
-#include "ScistPerfPhyImp.hpp"
-#include "ScistPerfPhyUtils.hpp"
 #include "Utils2.h"
 #include "Utils3.h"
+#include "ScistPerfPhyUtils.hpp"
+#include "ScistPerfPhyImp.hpp"
+#include "ScistGenotype.hpp"
+#include "ScistDoublet.hpp"
+#include "ScistErrRateInf.hpp"
 
 //*****************************************************************************
 // Main driving functions
@@ -26,32 +27,23 @@ using namespace std;
 // ***************************************************************************
 // Main for computing lower bound
 // ***************************************************************************
-static void Usage() {
-  cout << "Usage: ./scistree <options> <input file> " << endl;
-  cout << "Options:\n";
-  // cout << "\t -d <dn>           dn: number of allowed doublet genotypes; dc:
-  // cost of having a doublet\n";
-  cout << "\t -d <dn>           dn: number of allowed doublet genotypes\n";
-  cout << "\t -v                Turn on verbose mode  \n";
-  // cout << "\t -p                Find optimal false positive rate and false
-  // negative rate\n"; cout << "\t -l                Find cell tree with branch
-  // length (by default, constructed cell trees don't have branch length\n";
-  cout << "\t -n                Only build simple neighbor joining tree (may "
-          "be useful for very large data)\n";
-  cout << "\t -e                Output mutation tree (may not be binary tree) "
-          "from called genotypes branch labels.\n";
-  cout << "\t -e0               Output mutation tree but don't output labels "
-          "(for visualizing large trees).\n";
-  // cout << "\t -s <level>        Use SPR tree search (this will be slower);
-  // level: # of SPRs to allow (default is 1)\n";
-  cout << "\t -o <output-file>  Set output file (used for mutation tree output "
-          "(in GML) format; should have suffix .gml (default: "
-          "mutation-tree.gml)\n";
-  cout << "\t -t <threshold>    Discard somewhat ambigous genotyeps when "
-          "constructing intial trees: \n\t\t\t genotypes discarded if the "
-          "prob. of alternative genotypes is less than <threshold> "
-          "\n\t\t\t(default is 0, i.e. use all genotypes)\n";
-  exit(1);
+static void Usage()
+{
+    cout << "Usage: ./scistree <options> <input file> " << endl;
+    cout << "Options:\n";
+    //cout << "\t -d <dn>           dn: number of allowed doublet genotypes; dc: cost of having a doublet\n";
+    cout << "\t -d <dn>           dn: number of allowed doublet genotypes\n";
+    cout << "\t -v                Turn on verbose mode  \n";
+    //cout << "\t -p                Find optimal false positive rate and false negative rate\n";
+    //cout << "\t -l                Find cell tree with branch length (by default, constructed cell trees don't have branch length\n";
+    cout << "\t -n                Only build simple neighbor joining tree (may be useful for very large data)\n";
+    cout << "\t -e                Output mutation tree (may not be binary tree) from called genotypes branch labels.\n";
+    cout << "\t -e0               Output mutation tree but don't output labels (for visualizing large trees).\n";
+    //cout << "\t -s <level>        Use SPR tree search (this will be slower); level: # of SPRs to allow (default is 1)\n";
+    cout << "\t -o <output-file>  Set output file (used for mutation tree output (in GML) format; should have suffix .gml (default: mutation-tree.gml)\n";
+    cout << "\t -t <threshold>    Discard somewhat ambigous genotyeps when constructing intial trees: \n\t\t\t genotypes discarded if the prob. of alternative genotypes is less than <threshold> \n\t\t\t(default is 0, i.e. use all genotypes)\n";
+    cout << "\t -k <number of threads>  Number of threads to use (default 1)\n";
+    exit(1);
 }
 
 // settings
@@ -72,282 +64,316 @@ static int numSCs = 0;
 static string strMutTreeOutFile = "mutation-tree.gml";
 static bool fOutPPEdgeLabel = false;
 static bool fOutputLabel = true;
+static int intNumThreads = 1;
 // GLobal variables
 
 // Local functions
-static bool CheckArguments(int argc, char **argv) {
-  if (argc <= 1) {
-    return false;
-  }
-
-  // Check argument one by one
-  // int argpos = 1;
-  for (int i = 1; i < argc; ++i) {
-    if (argv[i][0] == '-' && argv[i][1] == 'l') {
-      YW_ASSERT_INFO(i < argc - 1, "Check input");
-      fOptBrLen = true;
-      cout << "Turn on branch optimization. " << endl;
-    } else if (argv[i][0] == '-' && argv[i][1] == 'd') {
-      YW_ASSERT_INFO(i < argc - 1, "Check input");
-      ++i;
-      sscanf(argv[i], "%d", &numDoublets);
-      // YW_ASSERT_INFO( i <argc-1, "Check input" );
-      //++i;
-      // float costDoubletThis;
-      // sscanf(argv[i], "%f", &costDoubletThis);
-      // costDoublet = costDoubletThis;
-      // cout << "Setting doublet number to " << numDoublets << ", and doublet
-      // cost to " << costDoublet << endl;
-      cout << "Setting doublet number to " << numDoublets << endl;
-    } else if (argv[i][0] == '-' && argv[i][1] == 'v') {
-      YW_ASSERT_INFO(i < argc - 1, "Check input");
-      fVerbose = true;
-      cout << "Turn on verbose mode" << endl;
-    } else if (argv[i][0] == '-' && argv[i][1] == 'n') {
-      YW_ASSERT_INFO(i < argc - 1, "Check input");
-      fNJOnly = true;
-      cout << "Only build neighbor joining tree." << endl;
-    } else if (argv[i][0] == '-' && argv[i][1] == 'p') {
-      YW_ASSERT_INFO(i < argc - 1, "Check input");
-      fOptParam = true;
-      cout << "Search for optimal genotype error rates" << endl;
-    } else if (argv[i][0] == '-' && argv[i][1] == 'e') {
-      YW_ASSERT_INFO(i < argc - 1, "Check input");
-      fOutPPEdgeLabel = true;
-      cout << "Output perfect phylogeny with edge labels" << endl;
-
-      string strOpt = argv[i];
-      if (strOpt.length() >= 3 && strOpt[2] == '0') {
-        cout << "  -- no labels in mutation tree\n";
-        fOutputLabel = false;
-      }
-    } else if (argv[i][0] == '-' && argv[i][1] == 's') {
-      YW_ASSERT_INFO(i < argc - 1, "Check input");
-      fSPR = true;
-      ++i;
-      sscanf(argv[i], "%d", &numSPR);
-      cout << "Use SPR tree search: level set to " << numSPR << endl;
-    } else if (argv[i][0] == '-' && argv[i][1] == 't') {
-      YW_ASSERT_INFO(i < argc - 1, "Check input");
-      ++i;
-      float thresUse = 0.0;
-      sscanf(argv[i], "%f", &thresUse);
-      thresProbSignificance = thresUse;
-      cout << "Threshold for probability significance: set to "
-           << thresProbSignificance << endl;
-    } else if (argv[i][0] == '-' && argv[i][1] == 'o') {
-      YW_ASSERT_INFO(i < argc - 1, "Check input");
-      ++i;
-      strMutTreeOutFile = argv[i];
-      cout << "Use mutation tree file name to " << strMutTreeOutFile << endl;
-    }
-
-    else if (argv[i][0] != '-') {
-      // not an option one. Right now the only one is file
-      fileInArgIndex = i;
-      // filenameGMLPrefix = argv[i];
-    } else {
-      return false;
+static bool CheckArguments(int argc, char **argv)
+{
+    if (argc <= 1)
+    {
+        return false;
     }
-  }
 
-  return true;
-}
-
-// input handling
-static ScistGenGenotypeMat *ReadsInput(const char *filename) {
-  //
-  ifstream inFile(filename);
-  if (!inFile) {
-    cout << "Can not open " << filename << endl;
-    YW_ASSERT_INFO(false, "Stop");
-  }
-  ScistGenGenotypeMat *pMatIn = NULL;
-  while (inFile.eof() == false) {
-    const int BUF_SZ = 102400;
-    char buffer[BUF_SZ];
-    inFile.getline(buffer, BUF_SZ);
-    if (strlen(buffer) > 0) {
-      // cout << "read one line: " << buffer << endl;
-      // now try to read alleles
-      std::istringstream is(buffer);
-
-      // looking for keyword
-      string strKey;
-      is >> strKey;
-      if (strKey == "HAPLOTYPES" || strKey == "HAPLOID") {
-        is >> numSites >> numSCs;
-        // cout << "numSites: " << numSites << ", numSCs: " << numSCs << endl;
-        YW_ASSERT_INFO(numSites > 0 && numSCs > 0,
-                       "Site and single cells numbers: Cannot be zeros");
-
-        // read in names if specified
-        while (is.eof() == false) {
-          string strName;
-          is >> strName;
-          if (strName.length() > 0) {
-            listCellNames.push_back(strName);
-            // cout << "One lineage name: " << strName << endl;
-          }
-          if ((int)listCellNames.size() > numSCs) {
-            break;
-          }
+    // Check argument one by one
+    //int argpos = 1;
+    for (int i = 1; i < argc; ++i)
+    {
+        if (argv[i][0] == '-' && argv[i][1] == 'l')
+        {
+            YW_ASSERT_INFO(i < argc - 1, "Check input");
+            fOptBrLen = true;
+            cout << "Turn on branch optimization. " << endl;
         }
-        // if (listCellNames.size() > 0 && (int)listCellNames.size() != numSCs) {
-        //   YW_ASSERT_INFO(
-        //       false, "Fatal error: you must provide names for each lineage");
-        // }
-        bool fSiteName = false;
-        if (listCellNames.size() > 0) {
-          fSiteName = true;
-          ;
+        else if (argv[i][0] == '-' && argv[i][1] == 'd')
+        {
+            YW_ASSERT_INFO(i < argc - 1, "Check input");
+            ++i;
+            sscanf(argv[i], "%d", &numDoublets);
+            //YW_ASSERT_INFO( i <argc-1, "Check input" );
+            //++i;
+            //float costDoubletThis;
+            //sscanf(argv[i], "%f", &costDoubletThis);
+            //costDoublet = costDoubletThis;
+            //cout << "Setting doublet number to " << numDoublets << ", and doublet cost to " << costDoublet << endl;
+            cout << "Setting doublet number to " << numDoublets << endl;
         }
-
-        pMatIn = new ScistHaplotypeMat;
-        for (int i = 0; i < (int)listCellNames.size(); ++i) {
-          pMatIn->AddGenotypeName(listCellNames[i]);
+        else if (argv[i][0] == '-' && argv[i][1] == 'v')
+        {
+            YW_ASSERT_INFO(i < argc - 1, "Check input");
+            fVerbose = true;
+            cout << "Turn on verbose mode" << endl;
         }
-
-        pMatIn->ReadFromFile(inFile, numSites, numSCs, fSiteName);
-
-#if 0
-if( fSiteName )
-{
-cout << "List of site names: ";
-for(int i=0; i<numSites; ++i)
-{
-cout << pMatIn->GetSiteName(i) << " ";
-}
-cout << endl;
-}
-#endif
-
-        break;
-      } else if (strKey == "TERNARY") {
-        is >> numSites >> numSCs;
-        // cout << "numSites: " << numSites << ", numSCs: " << numSCs << endl;
-        YW_ASSERT_INFO(numSites > 0 && numSCs > 0,
-                       "Site and single cells numbers: Cannot be zeros");
-
-        // read in names if specified
-        while (is.eof() == false) {
-          string strName;
-          is >> strName;
-          if (strName.length() > 0) {
-            listCellNames.push_back(strName);
-            // cout << "One lineage name: " << strName << endl;
-          }
-          if ((int)listCellNames.size() > numSCs) {
-            break;
-          }
+        else if (argv[i][0] == '-' && argv[i][1] == 'n')
+        {
+            YW_ASSERT_INFO(i < argc - 1, "Check input");
+            fNJOnly = true;
+            cout << "Only build neighbor joining tree." << endl;
+        }
+        else if (argv[i][0] == '-' && argv[i][1] == 'p')
+        {
+            YW_ASSERT_INFO(i < argc - 1, "Check input");
+            fOptParam = true;
+            cout << "Search for optimal genotype error rates" << endl;
+        }
+        else if (argv[i][0] == '-' && argv[i][1] == 'e')
+        {
+            YW_ASSERT_INFO(i < argc - 1, "Check input");
+            fOutPPEdgeLabel = true;
+            cout << "Output perfect phylogeny with edge labels" << endl;
+
+            string strOpt = argv[i];
+            if (strOpt.length() >= 3 && strOpt[2] == '0')
+            {
+                cout << "  -- no labels in mutation tree\n";
+                fOutputLabel = false;
+            }
+        }
+        else if (argv[i][0] == '-' && argv[i][1] == 's')
+        {
+            YW_ASSERT_INFO(i < argc - 1, "Check input");
+            fSPR = true;
+            ++i;
+            sscanf(argv[i], "%d", &numSPR);
+            cout << "Use SPR tree search: level set to " << numSPR << endl;
+        }
+        else if (argv[i][0] == '-' && argv[i][1] == 't')
+        {
+            YW_ASSERT_INFO(i < argc - 1, "Check input");
+            ++i;
+            float thresUse = 0.0;
+            sscanf(argv[i], "%f", &thresUse);
+            thresProbSignificance = thresUse;
+            cout << "Threshold for probability significance: set to " << thresProbSignificance << endl;
+        }
+        else if (argv[i][0] == '-' && argv[i][1] == 'o')
+        {
+            YW_ASSERT_INFO(i < argc - 1, "Check input");
+            ++i;
+            strMutTreeOutFile = argv[i];
+            cout << "Use mutation tree file name to " << strMutTreeOutFile << endl;
         }
-        // if (listCellNames.size() > 0 && (int)listCellNames.size() != numSCs) {
-        //   YW_ASSERT_INFO(
-        //       false, "Fatal error: you must provide names for each lineage");
-        // }
-        bool fSiteName = false;
-        if (listCellNames.size() > 0) {
-          fSiteName = true;
-          ;
+        else if (argv[i][0] == '-' && argv[i][1] == 'k')
+        {
+            YW_ASSERT_INFO(i < argc - 1, "Check input");
+            ++i;
+            intNumThreads = std::stoi(argv[i]);
+            cout << "Use " << intNumThreads << " processing threads" << endl;
         }
 
-        pMatIn = new ScistTernaryMat;
-        for (int i = 0; i < (int)listCellNames.size(); ++i) {
-          pMatIn->AddGenotypeName(listCellNames[i]);
+        else if (argv[i][0] != '-')
+        {
+            // not an option one. Right now the only one is file
+            fileInArgIndex = i;
+            //filenameGMLPrefix = argv[i];
         }
+        else
+        {
+            return false;
+        }
+    }
 
-        pMatIn->ReadFromFile(inFile, numSites, numSCs, fSiteName);
+    return true;
+}
 
-        break;
-      }
+// input handling
+static ScistGenGenotypeMat *ReadsInput(const char *filename)
+{
+    //
+    ifstream inFile(filename);
+    if (!inFile)
+    {
+        cout << "Can not open " << filename << endl;
+        YW_ASSERT_INFO(false, "Stop");
     }
-  }
-  pMatIn->SetSignificantThres(thresProbSignificance);
-
-  // initialize cell names to plain 1, 2, ... if not specified
-  if (listCellNames.size() == 0) {
-    YW_ASSERT_INFO(numSCs > 0, "Number of SCs: not intiialized");
-    for (int c = 1; c <= numSCs; ++c) {
-      string str = std::to_string(c);
-      listCellNames.push_back(str);
+    ScistGenGenotypeMat *pMatIn = NULL;
+    while (inFile.eof() == false)
+    {
+        const int BUF_SZ = 102400;
+        char buffer[BUF_SZ];
+        inFile.getline(buffer, BUF_SZ);
+        if (strlen(buffer) > 0)
+        {
+            //cout << "read one line: " << buffer << endl;
+            // now try to read alleles
+            std::istringstream is(buffer);
+
+            // looking for keyword
+            string strKey;
+            is >> strKey;
+            if (strKey == "HAPLOTYPES" || strKey == "HAPLOID")
+            {
+                is >> numSites >> numSCs;
+                //cout << "numSites: " << numSites << ", numSCs: " << numSCs << endl;
+                YW_ASSERT_INFO(numSites > 0 && numSCs > 0, "Site and single cells numbers: Cannot be zeros");
+
+                // read in names if specified
+                while (is.eof() == false)
+                {
+                    string strName;
+                    is >> strName;
+                    if (strName.length() > 0)
+                    {
+                        listCellNames.push_back(strName);
+                        //cout << "One lineage name: " << strName << endl;
+                    }
+                    if ((int)listCellNames.size() > numSCs)
+                    {
+                        break;
+                    }
+                }
+                // if (listCellNames.size() > 0 && (int)listCellNames.size() != numSCs)
+                // {
+                //     YW_ASSERT_INFO(false, "Fatal error: you must provide names for each lineage");
+                // }
+                bool fSiteName = false;
+                if (listCellNames.size() > 0)
+                {
+                    fSiteName = true;
+                }
+
+                pMatIn = new ScistHaplotypeMat;
+                for (int i = 0; i < (int)listCellNames.size(); ++i)
+                {
+                    pMatIn->AddGenotypeName(listCellNames[i]);
+                }
+
+                pMatIn->ReadFromFile(inFile, numSites, numSCs, fSiteName);
+
+                break;
+            }
+            else if (strKey == "TERNARY")
+            {
+                is >> numSites >> numSCs;
+                //cout << "numSites: " << numSites << ", numSCs: " << numSCs << endl;
+                YW_ASSERT_INFO(numSites > 0 && numSCs > 0, "Site and single cells numbers: Cannot be zeros");
+
+                // read in names if specified
+                while (is.eof() == false)
+                {
+                    string strName;
+                    is >> strName;
+                    if (strName.length() > 0)
+                    {
+                        listCellNames.push_back(strName);
+                        //cout << "One lineage name: " << strName << endl;
+                    }
+                    if ((int)listCellNames.size() > numSCs)
+                    {
+                        break;
+                    }
+                }
+                // if (listCellNames.size() > 0 && (int)listCellNames.size() != numSCs)
+                // {
+                //     YW_ASSERT_INFO(false, "Fatal error: you must provide names for each lineage");
+                // }
+                bool fSiteName = false;
+                if (listCellNames.size() > 0)
+                {
+                    fSiteName = true;
+                }
+
+                pMatIn = new ScistTernaryMat;
+                for (int i = 0; i < (int)listCellNames.size(); ++i)
+                {
+                    pMatIn->AddGenotypeName(listCellNames[i]);
+                }
+
+                pMatIn->ReadFromFile(inFile, numSites, numSCs, fSiteName);
+
+                break;
+            }
+        }
     }
-  }
-  pMatIn->GetSiteNamesAll(listSiteNames);
+    pMatIn->SetSignificantThres(thresProbSignificance);
+
+    // initialize cell names to plain 1, 2, ... if not specified
+    if (listCellNames.size() == 0)
+    {
+        YW_ASSERT_INFO(numSCs > 0, "Number of SCs: not intiialized");
+        for (int c = 1; c <= numSCs; ++c)
+        {
+            string str = std::to_string(c);
+            listCellNames.push_back(str);
+        }
+    }
+    pMatIn->GetSiteNamesAll(listSiteNames);
 
-  return pMatIn;
+    return pMatIn;
 }
 
 // test code
-static void TestCode(const char *filename) {
-  //
-
-  ScistGenGenotypeMat *pMatInput = ReadsInput(filename);
-  string filenameUse = filename;
-  pMatInput->SetFileName(filenameUse);
-
-  // cout << "Input genotype matrix:\n";
-  // pMatInput->Dump();
-  // string strNJ2 = pMatInput->ConsNJTree();
-  // cout << "NJ tree: " << strNJ2 << endl;
-  // delete pMatInput;
-  // exit(1);
-
-  if (fOptParam) {
-    cout << "Now searching for optimal genotype error rates...\n";
-    ScistErrRateInf serInf(*pMatInput);
-    serInf.SetVerbose(fVerbose);
-    serInf.Infer();
-  } else {
-    string treeNJ = pMatInput->ConsNJTree();
-    if (fVerbose) {
-      cout << "Neighbor joining tree from noisy genotypes: " << treeNJ << endl;
-    }
-    if (fNJOnly) {
-      delete pMatInput;
-      return;
+static void TestCode(const char *filename)
+{
+    //
+
+    ScistGenGenotypeMat *pMatInput = ReadsInput(filename);
+    string filenameUse = filename;
+    pMatInput->SetFileName(filenameUse);
+
+    if (fOptParam)
+    {
+        cout << "Now searching for optimal genotype error rates...\n";
+        ScistErrRateInf serInf(*pMatInput);
+        serInf.SetVerbose(fVerbose);
+        serInf.Infer();
     }
+    else
+    {
+        cout << "Initializing ConsNJTree()...." << endl;
+        std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
+        std::string treeNJ = pMatInput->ConsNJTree();
+        cout << "...finished" << endl;
+        std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
+        std::cout << "Time elasped: " << std::chrono::duration_cast<std::chrono::seconds>(end - begin).count() << " [seconds]" << std::endl;
+
+        if (fVerbose)
+        {
+            cout << "Neighbor joining tree from noisy genotypes: " << treeNJ << endl;
+        }
+        if (fNJOnly)
+        {
+            delete pMatInput;
+            return;
+        }
+
+        //ScistInfPerfPhyTest();
+        // plain mode if no double is allowed
+        if (numDoublets == 0)
+        {
 
-    // ScistInfPerfPhyTest();
-    // plain mode if no double is allowed
-    if (numDoublets == 0) {
-#if 0
-            ScistFullPerfPhyMLE ppInfHeu(*pMatInput);
+            ScistPerfPhyMLE ppInfHeu(*pMatInput);
+            ppInfHeu.SetBrOpt(fOptBrLen);
             ppInfHeu.SetVerbose(fVerbose);
+            ppInfHeu.SetPPOut(fOutPPEdgeLabel);
+            ppInfHeu.SetPPOutLabel(fOutputLabel);
+            ppInfHeu.SetSPR(fSPR);
+            ppInfHeu.SetSPRNum(numSPR);
+            ppInfHeu.SetCellNames(listCellNames);
+            ppInfHeu.SetSiteNames(listSiteNames);
+            ppInfHeu.SetMutTreeFileName(strMutTreeOutFile);
+            ppInfHeu.SetNumThreads(intNumThreads);
             ppInfHeu.Infer();
-#endif
-
-      ScistPerfPhyMLE ppInfHeu(*pMatInput);
-      ppInfHeu.SetBrOpt(fOptBrLen);
-      ppInfHeu.SetVerbose(fVerbose);
-      ppInfHeu.SetPPOut(fOutPPEdgeLabel);
-      ppInfHeu.SetPPOutLabel(fOutputLabel);
-      ppInfHeu.SetSPR(fSPR);
-      ppInfHeu.SetSPRNum(numSPR);
-      ppInfHeu.SetCellNames(listCellNames);
-      ppInfHeu.SetSiteNames(listSiteNames);
-      ppInfHeu.SetMutTreeFileName(strMutTreeOutFile);
-      ppInfHeu.Infer();
-    } else {
-      // right now only work with haplotype matrix
-      ScistHaplotypeMat *pMatInputUse =
-          dynamic_cast<ScistHaplotypeMat *>(pMatInput);
-      YW_ASSERT_INFO(
-          pMatInputUse != NULL,
-          "At present, doublet feature only works for binary genotype matrix.");
-
-      cout << "SEARCHING FOR DOUBLETS...\n";
-      ScistDoubletSearch sds(*pMatInput, numDoublets);
-      sds.SetVerbose(fVerbose);
-      sds.SetDouletCost(costDoublet);
-      sds.SetMutTreeOut(fOutPPEdgeLabel);
-      sds.SetCellNames(listCellNames);
-      sds.SetSiteNames(listSiteNames);
-      sds.SetMutTreeFileName(strMutTreeOutFile);
-      sds.SearchInc();
+        }
+        else
+        {
+            // right now only work with haplotype matrix
+            ScistHaplotypeMat *pMatInputUse = dynamic_cast<ScistHaplotypeMat *>(pMatInput);
+            YW_ASSERT_INFO(pMatInputUse != NULL, "At present, doublet feature only works for binary genotype matrix.");
+
+            cout << "SEARCHING FOR DOUBLETS...\n";
+            ScistDoubletSearch sds(*pMatInput, numDoublets);
+            sds.SetVerbose(fVerbose);
+            sds.SetDouletCost(costDoublet);
+            sds.SetMutTreeOut(fOutPPEdgeLabel);
+            sds.SetCellNames(listCellNames);
+            sds.SetSiteNames(listSiteNames);
+            sds.SetMutTreeFileName(strMutTreeOutFile);
+            sds.SearchInc();
+        }
     }
-  }
 
-  delete pMatInput;
+    delete pMatInput;
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////
@@ -355,40 +381,43 @@ static void TestCode(const char *filename) {
 const char *CODE_VER_INFO = "*** SCISTREE ver. 1.2.0.6, May 19, 2019 ***";
 
 //******************************************************************
-int main_in_c(int argc, char **argv) {
-  //    int seq = 0x001;
-  //    int seqMut;
-  //    MutateHCSeqAt(seq, seqMut, 4, 2);
-  // cout << "mutated seq = " << seqMut << endl;
-
-  string outputfile = argv[argc - 1];
-  string str2 = "scistree.input";
-  string str3 = "scistree.output";
-  outputfile.replace(outputfile.find(str2), str2.length(), str3);
-
-  ofstream out(outputfile);
-  auto *coutbuf = cout.rdbuf(); // save old buf
-  cout.rdbuf(out.rdbuf());     // redirect cout to out.txt!
-
-  cout << CODE_VER_INFO << endl << endl;
-
-  // first verify usage
-  if (CheckArguments(argc, argv) == false) {
-    Usage();
-  }
+int main_in_c(int argc, char **argv)
+{
+    //    int seq = 0x001;
+    //    int seqMut;
+    //    MutateHCSeqAt(seq, seqMut, 4, 2);
+    // cout << "mutated seq = " << seqMut << endl;
+
+    string outputfile = argv[argc - 1];
+    string str2 = "scistree.input";
+    string str3 = "scistree.output";
+    outputfile.replace(outputfile.find(str2), str2.length(), str3);
+
+    ofstream out(outputfile);
+    auto *coutbuf = cout.rdbuf(); // save old buf
+    cout.rdbuf(out.rdbuf());      // redirect cout to out.txt!
+
+    cout << CODE_VER_INFO << endl
+         << endl;
+
+    // first verify usage
+    if (CheckArguments(argc, argv) == false)
+    {
+        Usage();
+    }
 
-  // cout << "here0\n";
-  long tstart1 = GetCurrentTimeTick();
+    // cout << "here0\n";
+    long tstart1 = GetCurrentTimeTick();
 
-  TestCode(argv[fileInArgIndex]);
+    TestCode(argv[fileInArgIndex]);
 
-  cout << "Elapsed time = " << GetElapseTime(tstart1) << " seconds." << endl;
+    cout << "Elapsed time = " << GetElapseTime(tstart1) << " seconds." << endl;
 
-  // dump out stats
-  // ApproxGTPStats::Instance().DumpStats();
+    // dump out stats
+    // ApproxGTPStats::Instance().DumpStats();
 
-  cout.rdbuf(coutbuf); // reset to standard output again
-  out.close();
+    cout.rdbuf(coutbuf); // reset to standard output again
+    out.close();
 
-  return 0;
+    return 0;
 }
diff --git a/trisicell/tl/solver/_scistree.py b/trisicell/tl/solver/_scistree.py
index 57058f2..479872e 100644
--- a/trisicell/tl/solver/_scistree.py
+++ b/trisicell/tl/solver/_scistree.py
@@ -13,7 +13,7 @@
 # from Bio.Phylo.TreeConstruction import DistanceTreeConstructor
 
 
-def scistree(df_input, alpha, beta, experiment=False):
+def scistree(df_input, alpha, beta, n_threads=1, experiment=False):
     """Solving using ScisTree.
 
     Accurate and efficient cell lineage tree inference from noisy
@@ -30,6 +30,8 @@ def scistree(df_input, alpha, beta, experiment=False):
         False positive error rate.
     beta : :obj:`float`
         False negative error rate.
+    n_threads : :obj:`int`
+        Number of threads.
     experiment : :obj:`bool`, optional
         Is in the experiment mode (the log won't be shown), by default False
 
@@ -41,7 +43,9 @@ def scistree(df_input, alpha, beta, experiment=False):
     """
 
     if not experiment:
-        tsc.logg.info(f"running ScisTree with alpha={alpha}, beta={beta}")
+        tsc.logg.info(
+            f"running ScisTree with alpha={alpha}, beta={beta}, n_threads={n_threads}"
+        )
     tmpdir = tsc.ul.tmpdirsys(suffix=".scistree")
     cells = df_input.index
     snvs = df_input.columns
@@ -66,6 +70,8 @@ def scistree(df_input, alpha, beta, experiment=False):
         "-d",
         "0",
         "-e",
+        "-k",
+        f"{n_threads}",
         "-o",
         f"{tmpdir.name}/scistree.gml",
         f"{tmpdir.name}/scistree.input",
@@ -108,7 +114,7 @@ def scistree(df_input, alpha, beta, experiment=False):
         return df_output, running_time
 
 
-def rscistree(adata, alpha=0, beta=0, mode="haploid"):
+def rscistree(adata, alpha=0, beta=0, n_threads=1, mode="haploid"):
     """Solving using read-count ScisTree.
 
     Accurate and efficient cell lineage tree inference from noisy
@@ -125,6 +131,8 @@ def rscistree(adata, alpha=0, beta=0, mode="haploid"):
         False positive error rate.
     beta : :obj:`float`
         False negative error rate.
+    n_threads : :obj:`int`
+        Number of threads.
     mode : :obj:`str`
         Mode of calculating the probability from read-count.
         In {'haploid', 'ternary'}, by default haploid
@@ -138,7 +146,7 @@ def rscistree(adata, alpha=0, beta=0, mode="haploid"):
         Values inside this matrix show the presence (1) and absence (0).
     """
 
-    tsc.logg.info(f"running rScisTree with mode={mode}")
+    tsc.logg.info(f"running rScisTree with n_threads={n_threads}, mode={mode}")
     tmpdir = tsc.ul.tmpdirsys(suffix=".rscistree", dirname=".")
 
     cells = adata.obs_names
@@ -172,6 +180,8 @@ def rscistree(adata, alpha=0, beta=0, mode="haploid"):
         "-d",
         "0",
         "-e",
+        "-k",
+        f"{n_threads}",
         "-o",
         f"{tmpdir.name}/rscistree.gml",
         f"{tmpdir.name}/rscistree.input",

From 653783c22349fae5b3c6d74a5e651295660f1ff6 Mon Sep 17 00:00:00 2001
From: Farid Rashidi <farid.rsh@gmail.com>
Date: Mon, 22 Nov 2021 21:07:16 -0500
Subject: [PATCH 10/11] release v0.0.20 (#92)

---
 .bumpversion.cfg      | 2 +-
 setup.py              | 2 +-
 trisicell/__init__.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 39ef74c..22f82a2 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.0.19
+current_version = 0.0.20
 commit = True
 message = [skip ci] {current_version} → {new_version}
 tag = False
diff --git a/setup.py b/setup.py
index daaf0da..a8abe24 100644
--- a/setup.py
+++ b/setup.py
@@ -12,7 +12,7 @@
     __author__ = ", ".join(["Farid Rashidi"])
     __maintainer__ = ", ".join(["Farid Rashidi"])
     __email__ = ", ".join(["farid.rsh@gmail.com"])
-    __version__ = "0.0.19"
+    __version__ = "0.0.20"
 
 if platform == "linux" or platform == "linux2":
     os.environ["CC"] = "g++"
diff --git a/trisicell/__init__.py b/trisicell/__init__.py
index 5d559d5..73ffb31 100644
--- a/trisicell/__init__.py
+++ b/trisicell/__init__.py
@@ -7,5 +7,5 @@
 __author__ = ", ".join(["Farid Rashidi"])
 __maintainer__ = ", ".join(["Farid Rashidi"])
 __email__ = ", ".join(["farid.rsh@gmail.com"])
-__version__ = "0.0.19"
+__version__ = "0.0.20"
 __all__ = (datasets, io, logg, pl, pp, settings, tl, ul)

From e99cb3b8bce2afe970d3c07b154de4a15956b3ef Mon Sep 17 00:00:00 2001
From: Farid Rashidi <farid.rsh@gmail.com>
Date: Mon, 22 Nov 2021 21:11:54 -0500
Subject: [PATCH 11/11] refinement (#93)

---
 docs/source/release_notes.rst | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
index 123b3a8..ce3d847 100644
--- a/docs/source/release_notes.rst
+++ b/docs/source/release_notes.rst
@@ -7,6 +7,15 @@ Release Notes
 =============
 
 
+Version 0.0.20 :small:`November 22, 2021`
+-----------------------------------------
+
+This version includes:
+
+    - Add multi-threaded ScisTree.
+    - Update the documentations.
+
+
 Version 0.0.19 :small:`October 18, 2021`
 ----------------------------------------