Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#### analysis
##### Fixed 🐛
- Fixed column names slipping into column values when `motifs = True` combined with `transform = ALR` in `get_pca` (e802da1)
##### Changed 🔄
- Added distance matrix to beta diversity output

#### draw
##### Changed 🔄
Expand Down
11 changes: 7 additions & 4 deletions glycowork/motif/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import networkx as nx
import statsmodels.api as sm
import matplotlib.pyplot as plt
from numpy import ndarray

Check warning on line 10 in glycowork/motif/analysis.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

glycowork/motif/analysis.py#L10

'numpy.ndarray' imported but unused (F401)

Check warning on line 10 in glycowork/motif/analysis.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

glycowork/motif/analysis.py#L10

Unused ndarray imported from numpy

plt.rcParams.update({
'font.size': 11, 'axes.labelsize': 12, 'axes.titlesize': 13,
'xtick.labelsize': 10, 'ytick.labelsize': 10, 'axes.linewidth': 0.8,
Expand Down Expand Up @@ -1015,13 +1017,14 @@
gamma: float = 0.1, # Uncertainty parameter for CLR transform
custom_scale: float | dict = 0, # Ratio of total signal in group2/group1 for an informed scale model (or group_idx: mean(group)/min(mean(groups)) signal dict for multivariate)
random_state: int | np.random.Generator | None = None # optional random state for reproducibility
) -> pd.DataFrame: # DataFrame with diversity indices and test statistics
) -> tuple[pd.DataFrame, pd.DataFrame ]: # First dataFrame with diversity indices and test statistics, second with beta-diversity distance matrix
"Calculates alpha (Shannon/Simpson) and beta (ANOSIM/PERMANOVA) diversity measures from glycomics data"
experiment = "diff" if group2 else "anova"
df, df_org, group1, group2 = preprocess_data(df, group1, group2, experiment = experiment, motifs = motifs, impute = False,
transform = transform, feature_set = feature_set, paired = paired, gamma = gamma,
custom_scale = custom_scale, custom_motifs = custom_motifs, random_state = random_state)
shopping_cart = []
distance_matrix = pd.DataFrame()
group_sizes = group1 if not group2 else len(group1)*[1]+len(group2)*[2]
group_counts = Counter(group_sizes)
# Sample-size aware alpha via Bayesian-Adaptive Alpha Adjustment
Expand Down Expand Up @@ -1066,8 +1069,8 @@
bc_diversity[index_1, index_2] = bc_pair
b_df_out = pd.DataFrame.from_dict(bc_diversity, orient = 'index')
out_len = int(np.sqrt(len(b_df_out)))
b_df_out_values = b_df_out.values.reshape(out_len, out_len)
beta_df_out = pd.DataFrame(data = b_df_out_values, index = range(out_len), columns = range(out_len))
distance_matrix = b_df_out.values.reshape(out_len, out_len)
beta_df_out = pd.DataFrame(data = distance_matrix, index = range(out_len), columns = range(out_len))
if all(count > 1 for count in group_counts.values()):
r, p = anosim(beta_df_out, group_sizes, permutations)
b_test_stats = pd.DataFrame({'Metric': 'Beta diversity (ANOSIM)', 'p-val': p, 'Effect size': r}, index = [0])
Expand All @@ -1079,7 +1082,7 @@
corrpvals, significance = correct_multiple_testing(df_out['p-val'], alpha)
df_out["corr p-val"] = corrpvals
df_out["significant"] = significance
return df_out.sort_values(by = 'p-val').sort_values(by = 'corr p-val').reset_index(drop = True)
return df_out.sort_values(by = 'p-val').sort_values(by = 'corr p-val').reset_index(drop = True), distance_matrix


def get_SparCC(
Expand Down
2 changes: 1 addition & 1 deletion tests/test_core_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4006,7 +4006,7 @@ def generate_group_data(base_proportions, n_samples=3, noise_scale=0.15):
# Run biodiversity analysis
results = get_biodiversity(df, group1, group2, metrics=['alpha', 'beta'])
# Basic assertions
assert isinstance(results, pd.DataFrame), "Results should be a DataFrame"
assert isinstance(results, tuple(pd.DataFrame, pd.DataFrame)), "Results should be a tuple of two DataFrames"
assert 'Metric' in results.columns, "Results should have a Metric column"
assert 'p-val' in results.columns, "Results should have a p-val column"
# Additional assertions to verify realistic results
Expand Down
Loading