From 11e02a16725248bca59f429af9f3b37aa3c92dc6 Mon Sep 17 00:00:00 2001
From: AlexBennett <a.rw.bennett@gmail.com>
Date: Tue, 7 Apr 2026 16:54:10 +0200
Subject: [PATCH 1/9] Added distance matrix to beta diversity output, required
 information to visualise sample clustering, a likely use-case

---
 glycowork/motif/analysis.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/glycowork/motif/analysis.py b/glycowork/motif/analysis.py
index 4c942f77..bbb659bb 100644
--- a/glycowork/motif/analysis.py
+++ b/glycowork/motif/analysis.py
@@ -7,6 +7,9 @@
 import networkx as nx
 import statsmodels.api as sm
 import matplotlib.pyplot as plt
+from numpy import ndarray
+from pandas import DataFrame
+
 plt.rcParams.update({
     'font.size': 11, 'axes.labelsize': 12, 'axes.titlesize': 13,
     'xtick.labelsize': 10, 'ytick.labelsize': 10, 'axes.linewidth': 0.8,
@@ -16,7 +19,7 @@
     'axes.prop_cycle': plt.cycler('color', ['#2D6A9F', '#C84B55', '#3A9268', '#E8863A', '#7B5EA7', '#C4843A', '#4AADA8'])
 })
 from collections import Counter
-from typing import Any
+from typing import Any, List
 from scipy.stats import ttest_ind, ttest_rel, norm, levene, f_oneway, spearmanr
 from statsmodels.formula.api import ols
 from statsmodels.stats.multitest import multipletests
@@ -1015,13 +1018,14 @@ def get_biodiversity(
     gamma: float = 0.1, # Uncertainty parameter for CLR transform
     custom_scale: float | dict = 0, # Ratio of total signal in group2/group1 for an informed scale model (or group_idx: mean(group)/min(mean(groups)) signal dict for multivariate)
     random_state: int | np.random.Generator | None = None # optional random state for reproducibility
-    ) -> pd.DataFrame: # DataFrame with diversity indices and test statistics
+    ) -> list[DataFrame | ndarray[Any, Any] | list[Any]]: # DataFrame with diversity indices and test statistics
   "Calculates alpha (Shannon/Simpson) and beta (ANOSIM/PERMANOVA) diversity measures from glycomics data"
   experiment = "diff" if group2 else "anova"
   df, df_org, group1, group2 = preprocess_data(df, group1, group2, experiment = experiment, motifs = motifs, impute = False,
                                                transform = transform, feature_set = feature_set, paired = paired, gamma = gamma,
                                                custom_scale = custom_scale, custom_motifs = custom_motifs, random_state = random_state)
   shopping_cart = []
+  distance_matrix = []
   group_sizes = group1 if not group2 else len(group1)*[1]+len(group2)*[2]
   group_counts = Counter(group_sizes)
   # Sample-size aware alpha via Bayesian-Adaptive Alpha Adjustment
@@ -1066,8 +1070,8 @@ def get_biodiversity(
         bc_diversity[index_1, index_2] = bc_pair
     b_df_out = pd.DataFrame.from_dict(bc_diversity, orient = 'index')
     out_len = int(np.sqrt(len(b_df_out)))
-    b_df_out_values = b_df_out.values.reshape(out_len, out_len)
-    beta_df_out = pd.DataFrame(data = b_df_out_values, index = range(out_len), columns = range(out_len))
+    distance_matrix = b_df_out.values.reshape(out_len, out_len)
+    beta_df_out = pd.DataFrame(data = distance_matrix, index = range(out_len), columns = range(out_len))
     if all(count > 1 for count in group_counts.values()):
       r, p = anosim(beta_df_out, group_sizes, permutations)
       b_test_stats = pd.DataFrame({'Metric': 'Beta diversity (ANOSIM)', 'p-val': p, 'Effect size': r}, index = [0])
@@ -1079,7 +1083,7 @@ def get_biodiversity(
   corrpvals, significance = correct_multiple_testing(df_out['p-val'], alpha)
   df_out["corr p-val"] = corrpvals
   df_out["significant"] = significance
-  return df_out.sort_values(by = 'p-val').sort_values(by = 'corr p-val').reset_index(drop = True)
+  return [df_out.sort_values(by = 'p-val').sort_values(by = 'corr p-val').reset_index(drop = True), distance_matrix]
 
 
 def get_SparCC(

From f5eb35a3cbe42d2cc1428bc5546a00cec6d6ba73 Mon Sep 17 00:00:00 2001
From: AlexBennett <a.rw.bennett@gmail.com>
Date: Tue, 7 Apr 2026 16:57:42 +0200
Subject: [PATCH 2/9] Changelog update

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f894ecf3..5a98ab75 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,8 @@
 #### analysis
 ##### Fixed 🐛
 - Fixed column names slipping into column values when `motifs = True` combined with `transform = ALR` in `get_pca` (e802da1)
+##### Changed 🔄
+- Added distance matrix to beta diversity output
 
 #### draw
 ##### Changed 🔄

From b0f72e7e20692e331ee8894f0f2160b6cdc709e7 Mon Sep 17 00:00:00 2001
From: AlexBennett <a.rw.bennett@gmail.com>
Date: Thu, 9 Apr 2026 17:24:21 +0200
Subject: [PATCH 3/9] Beta diversity update: fixed testing

---
 glycowork/motif/analysis.py  | 9 ++++-----
 tests/test_core_functions.py | 2 +-
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/glycowork/motif/analysis.py b/glycowork/motif/analysis.py
index bbb659bb..f1a10c02 100644
--- a/glycowork/motif/analysis.py
+++ b/glycowork/motif/analysis.py
@@ -8,7 +8,6 @@
 import statsmodels.api as sm
 import matplotlib.pyplot as plt
 from numpy import ndarray
-from pandas import DataFrame
 
 plt.rcParams.update({
     'font.size': 11, 'axes.labelsize': 12, 'axes.titlesize': 13,
@@ -19,7 +18,7 @@
     'axes.prop_cycle': plt.cycler('color', ['#2D6A9F', '#C84B55', '#3A9268', '#E8863A', '#7B5EA7', '#C4843A', '#4AADA8'])
 })
 from collections import Counter
-from typing import Any, List
+from typing import Any
 from scipy.stats import ttest_ind, ttest_rel, norm, levene, f_oneway, spearmanr
 from statsmodels.formula.api import ols
 from statsmodels.stats.multitest import multipletests
@@ -1018,14 +1017,14 @@ def get_biodiversity(
     gamma: float = 0.1, # Uncertainty parameter for CLR transform
     custom_scale: float | dict = 0, # Ratio of total signal in group2/group1 for an informed scale model (or group_idx: mean(group)/min(mean(groups)) signal dict for multivariate)
     random_state: int | np.random.Generator | None = None # optional random state for reproducibility
-    ) -> list[DataFrame | ndarray[Any, Any] | list[Any]]: # DataFrame with diversity indices and test statistics
+    ) -> tuple[pd.DataFrame, pd.DataFrame ]: # First dataFrame with diversity indices and test statistics, second with beta-diversity distance matrix
   "Calculates alpha (Shannon/Simpson) and beta (ANOSIM/PERMANOVA) diversity measures from glycomics data"
   experiment = "diff" if group2 else "anova"
   df, df_org, group1, group2 = preprocess_data(df, group1, group2, experiment = experiment, motifs = motifs, impute = False,
                                                transform = transform, feature_set = feature_set, paired = paired, gamma = gamma,
                                                custom_scale = custom_scale, custom_motifs = custom_motifs, random_state = random_state)
   shopping_cart = []
-  distance_matrix = []
+  distance_matrix = pd.DataFrame()
   group_sizes = group1 if not group2 else len(group1)*[1]+len(group2)*[2]
   group_counts = Counter(group_sizes)
   # Sample-size aware alpha via Bayesian-Adaptive Alpha Adjustment
@@ -1083,7 +1082,7 @@ def get_biodiversity(
   corrpvals, significance = correct_multiple_testing(df_out['p-val'], alpha)
   df_out["corr p-val"] = corrpvals
   df_out["significant"] = significance
-  return [df_out.sort_values(by = 'p-val').sort_values(by = 'corr p-val').reset_index(drop = True), distance_matrix]
+  return df_out.sort_values(by = 'p-val').sort_values(by = 'corr p-val').reset_index(drop = True), distance_matrix
 
 
 def get_SparCC(
diff --git a/tests/test_core_functions.py b/tests/test_core_functions.py
index 26b299f5..deb51a1c 100644
--- a/tests/test_core_functions.py
+++ b/tests/test_core_functions.py
@@ -4006,7 +4006,7 @@ def generate_group_data(base_proportions, n_samples=3, noise_scale=0.15):
     # Run biodiversity analysis
     results = get_biodiversity(df, group1, group2, metrics=['alpha', 'beta'])
     # Basic assertions
-    assert isinstance(results, pd.DataFrame), "Results should be a DataFrame"
+    assert isinstance(results, tuple(pd.DataFrame, pd.DataFrame)), "Results should be a tuple of two DataFrames"
     assert 'Metric' in results.columns, "Results should have a Metric column"
     assert 'p-val' in results.columns, "Results should have a p-val column"
     # Additional assertions to verify realistic results

From 956111f20d24e089977c4b5cc0e561038a1ff6cf Mon Sep 17 00:00:00 2001
From: AlexBennett <a.rw.bennett@gmail.com>
Date: Fri, 10 Apr 2026 14:30:34 +0200
Subject: [PATCH 4/9] Beta diversity update: fixed testing

---
 glycowork/motif/analysis.py  | 2 +-
 tests/test_core_functions.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/glycowork/motif/analysis.py b/glycowork/motif/analysis.py
index f1a10c02..ca930553 100644
--- a/glycowork/motif/analysis.py
+++ b/glycowork/motif/analysis.py
@@ -1017,7 +1017,7 @@ def get_biodiversity(
     gamma: float = 0.1, # Uncertainty parameter for CLR transform
     custom_scale: float | dict = 0, # Ratio of total signal in group2/group1 for an informed scale model (or group_idx: mean(group)/min(mean(groups)) signal dict for multivariate)
     random_state: int | np.random.Generator | None = None # optional random state for reproducibility
-    ) -> tuple[pd.DataFrame, pd.DataFrame ]: # First dataFrame with diversity indices and test statistics, second with beta-diversity distance matrix
+    ) -> tuple(pd.DataFrame, pd.DataFrame): # First dataFrame with diversity indices and test statistics, second with beta-diversity distance matrix
   "Calculates alpha (Shannon/Simpson) and beta (ANOSIM/PERMANOVA) diversity measures from glycomics data"
   experiment = "diff" if group2 else "anova"
   df, df_org, group1, group2 = preprocess_data(df, group1, group2, experiment = experiment, motifs = motifs, impute = False,
diff --git a/tests/test_core_functions.py b/tests/test_core_functions.py
index deb51a1c..af7b803f 100644
--- a/tests/test_core_functions.py
+++ b/tests/test_core_functions.py
@@ -4006,7 +4006,8 @@ def generate_group_data(base_proportions, n_samples=3, noise_scale=0.15):
     # Run biodiversity analysis
     results = get_biodiversity(df, group1, group2, metrics=['alpha', 'beta'])
     # Basic assertions
-    assert isinstance(results, tuple(pd.DataFrame, pd.DataFrame)), "Results should be a tuple of two DataFrames"
+    assert isinstance(results, tuple), "Results should be a tuple"
+    assert len(results) ==2, "Results should be consist of two DataFrames"
     assert 'Metric' in results.columns, "Results should have a Metric column"
     assert 'p-val' in results.columns, "Results should have a p-val column"
     # Additional assertions to verify realistic results

From ad028875e0b9af8a62b398f6ff2746ade8306d35 Mon Sep 17 00:00:00 2001
From: AlexBennett <a.rw.bennett@gmail.com>
Date: Fri, 10 Apr 2026 14:37:33 +0200
Subject: [PATCH 5/9] Beta diversity update: fixed testing

---
 glycowork/motif/analysis.py  | 2 +-
 tests/test_core_functions.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/glycowork/motif/analysis.py b/glycowork/motif/analysis.py
index ca930553..6287a1d9 100644
--- a/glycowork/motif/analysis.py
+++ b/glycowork/motif/analysis.py
@@ -1017,7 +1017,7 @@ def get_biodiversity(
     gamma: float = 0.1, # Uncertainty parameter for CLR transform
     custom_scale: float | dict = 0, # Ratio of total signal in group2/group1 for an informed scale model (or group_idx: mean(group)/min(mean(groups)) signal dict for multivariate)
     random_state: int | np.random.Generator | None = None # optional random state for reproducibility
-    ) -> tuple(pd.DataFrame, pd.DataFrame): # First dataFrame with diversity indices and test statistics, second with beta-diversity distance matrix
+    ) -> tuple(): # First dataFrame with diversity indices and test statistics, second with beta-diversity distance matrix
   "Calculates alpha (Shannon/Simpson) and beta (ANOSIM/PERMANOVA) diversity measures from glycomics data"
   experiment = "diff" if group2 else "anova"
   df, df_org, group1, group2 = preprocess_data(df, group1, group2, experiment = experiment, motifs = motifs, impute = False,
diff --git a/tests/test_core_functions.py b/tests/test_core_functions.py
index af7b803f..fab7a9c4 100644
--- a/tests/test_core_functions.py
+++ b/tests/test_core_functions.py
@@ -4007,7 +4007,7 @@ def generate_group_data(base_proportions, n_samples=3, noise_scale=0.15):
     results = get_biodiversity(df, group1, group2, metrics=['alpha', 'beta'])
     # Basic assertions
     assert isinstance(results, tuple), "Results should be a tuple"
-    assert len(results) ==2, "Results should be consist of two DataFrames"
+    assert len(results) == 2, "Results should be consist of two DataFrames"
     assert 'Metric' in results.columns, "Results should have a Metric column"
     assert 'p-val' in results.columns, "Results should have a p-val column"
     # Additional assertions to verify realistic results

From 888852e7eb289065bf783e43f00763ad48671fac Mon Sep 17 00:00:00 2001
From: AlexBennett <a.rw.bennett@gmail.com>
Date: Fri, 10 Apr 2026 15:26:57 +0200
Subject: [PATCH 6/9] Beta diversity update: fixed testing

---
 tests/test_core_functions.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/test_core_functions.py b/tests/test_core_functions.py
index fab7a9c4..3892ee17 100644
--- a/tests/test_core_functions.py
+++ b/tests/test_core_functions.py
@@ -4008,7 +4008,10 @@ def generate_group_data(base_proportions, n_samples=3, noise_scale=0.15):
     # Basic assertions
     assert isinstance(results, tuple), "Results should be a tuple"
     assert len(results) == 2, "Results should be consist of two DataFrames"
-    assert 'Metric' in results.columns, "Results should have a Metric column"
+    stats, dist_matrix = results
+    assert isinstance(stats, pd.DataFrame)
+    assert isinstance(dist_matrix, pd.DataFrame)
+    assert 'Metric' in stats.columns, "Stats results should have a Metric column"
     assert 'p-val' in results.columns, "Results should have a p-val column"
     # Additional assertions to verify realistic results
     assert len(results) >= 2, "Should have at least alpha and beta diversity results"

From 3988ae127bca5c7f652f27e1563fe7f160efd36d Mon Sep 17 00:00:00 2001
From: AlexBennett <a.rw.bennett@gmail.com>
Date: Fri, 10 Apr 2026 15:43:52 +0200
Subject: [PATCH 7/9] Beta diversity update: fixed testing

---
 tests/test_core_functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_core_functions.py b/tests/test_core_functions.py
index 3892ee17..690aed50 100644
--- a/tests/test_core_functions.py
+++ b/tests/test_core_functions.py
@@ -4010,7 +4010,7 @@ def generate_group_data(base_proportions, n_samples=3, noise_scale=0.15):
     assert len(results) == 2, "Results should be consist of two DataFrames"
     stats, dist_matrix = results
     assert isinstance(stats, pd.DataFrame)
-    assert isinstance(dist_matrix, pd.DataFrame)
+    assert isinstance(dist_matrix, np.ndarray)
     assert 'Metric' in stats.columns, "Stats results should have a Metric column"
     assert 'p-val' in results.columns, "Results should have a p-val column"
     # Additional assertions to verify realistic results

From e2f43f4c2a5909002ab4511f0fae6b6ca52be405 Mon Sep 17 00:00:00 2001
From: AlexBennett <a.rw.bennett@gmail.com>
Date: Fri, 10 Apr 2026 15:52:55 +0200
Subject: [PATCH 8/9] Beta diversity update: fixed testing

---
 tests/test_core_functions.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/test_core_functions.py b/tests/test_core_functions.py
index 690aed50..7a1749e2 100644
--- a/tests/test_core_functions.py
+++ b/tests/test_core_functions.py
@@ -4012,13 +4012,13 @@ def generate_group_data(base_proportions, n_samples=3, noise_scale=0.15):
     assert isinstance(stats, pd.DataFrame)
     assert isinstance(dist_matrix, np.ndarray)
     assert 'Metric' in stats.columns, "Stats results should have a Metric column"
-    assert 'p-val' in results.columns, "Results should have a p-val column"
+    assert 'p-val' in stats.columns, "Results should have a p-val column"
     # Additional assertions to verify realistic results
-    assert len(results) >= 2, "Should have at least alpha and beta diversity results"
-    assert all(0 <= p <= 1 for p in results['p-val']), "p-values should be between 0 and 1"
+    assert len(stats) >= 2, "Should have at least alpha and beta diversity results"
+    assert all(0 <= p <= 1 for p in stats['p-val']), "p-values should be between 0 and 1"
     # Optional: Check if the differences are detectable
     # The groups are designed to be different, so p-values should be < 0.05
-    assert any(p < 0.05 for p in results['p-val']), "Should detect differences between groups"
+    assert any(p < 0.05 for p in stats['p-val']), "Should detect differences between groups"
     results = get_biodiversity(df, group1, group2, metrics=['alpha', 'beta'], paired=True)
     df3 = df[['glycan'] + group1 + group2].copy()
     for i in range(3):

From 34c1666b52b840b2541a263429fa9ed29c8cedc9 Mon Sep 17 00:00:00 2001
From: AlexBennett <a.rw.bennett@gmail.com>
Date: Fri, 10 Apr 2026 16:02:13 +0200
Subject: [PATCH 9/9] Beta diversity update: fixed testing

---
 tests/test_core_functions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_core_functions.py b/tests/test_core_functions.py
index 7a1749e2..adfdba20 100644
--- a/tests/test_core_functions.py
+++ b/tests/test_core_functions.py
@@ -4024,9 +4024,9 @@ def generate_group_data(base_proportions, n_samples=3, noise_scale=0.15):
     for i in range(3):
         df3[f'sample3_{i + 1}'] = group2_data[i] * 1.2
     results = get_biodiversity(df3, [1, 1, 1, 2, 2, 2, 3, 3, 3], [], metrics = ['alpha'])
-    assert isinstance(results, pd.DataFrame)
+    assert isinstance(results, tuple)
     results = get_biodiversity(df, group1, group2, metrics = ['beta'], motifs = True)
-    assert isinstance(results, pd.DataFrame)
+    assert isinstance(results, tuple)
 
 
 @pytest.fixture