Skip to content
Draft
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
73e13a2
added sample size calculation in methods\data_analysis.py and forms\d…
selenabr Jun 5, 2024
f133c87
enabled possibility to choose one protein for calculation dependent o…
selenabr Jun 17, 2024
49c7f0e
fixed errors with missing inputs
selenabr Jun 18, 2024
6d8c9a8
added variance calculation and testing function and edited sample siz…
selenabr Jun 18, 2024
0b95cf0
fixed some errors
selenabr Jun 19, 2024
22c293d
output field for result
selenabr Jun 20, 2024
fd756df
Merge branch 'dev' into bachelor-thesis-selena
selenabr Jun 20, 2024
b22b6e7
further implementation of output field for result
selenabr Jun 21, 2024
c6a2f3b
display display_output in output field
selenabr Jun 23, 2024
032286c
display_output field displayed in the same size and position as the o…
selenabr Jun 25, 2024
e90fab3
test function for sample_size_calculation
selenabr Jun 25, 2024
01eba42
Merge branch 'dev' into bachelor-thesis-selena
selenabr Jun 25, 2024
d3cf9d8
edited description of function
selenabr Jun 26, 2024
3ce4ae1
check if implemented function of Paper (Cairns et al., 2009) and libr…
selenabr Jul 8, 2024
f78b0b9
power calculation and test of library-function and implemented paper-…
selenabr Jul 8, 2024
e3dd1c3
added test for power_calculation method
selenabr Aug 20, 2024
2e3de5a
fixed constructor error
selenabr Aug 21, 2024
a46a074
sample size calculation for different group sizes (Cohen 1988) and mo…
selenabr Aug 23, 2024
3446be3
code formatting, resolved comments (output not a float, significant_p…
selenabr Aug 26, 2024
cb25777
feature: user can choose whether metadata contains a column for indiv…
selenabr Aug 28, 2024
52ef105
adapted test for power_calculation and sample_size_calculation and ch…
selenabr Aug 28, 2024
ac9e783
added function that calculates sample size for all proteins and shows…
selenabr Sep 3, 2024
e54c767
formatting
selenabr Sep 3, 2024
2faa972
commented the dataframe-output-stuff out, otherwise violin plot could…
selenabr Sep 3, 2024
25cf2b2
changed color of violinplot and added axis-description
selenabr Sep 3, 2024
ae4e8cb
changed color of violinplot and removed axis-description
selenabr Sep 5, 2024
5c63008
resolved comments
selenabr Sep 5, 2024
0adc15c
Added function to get dataframes with sample size column as output
selenabr Sep 5, 2024
dcba877
Added power_calculation_for_all_proteins to calculate minimum power f…
selenabr Sep 6, 2024
eb32984
Fixed hover display of violin plots
selenabr Sep 8, 2024
1adda1b
fixed typo and removed unnecessary comment
selenabr Oct 8, 2024
d0ec174
calculations for thesis (should be removed before merging into dev)
selenabr Oct 29, 2024
776dc55
calculations for thesis (should be removed before merging into dev)
selenabr Nov 4, 2024
7131d3b
put calculation for thesis into comment and changed description of me…
selenabr Nov 18, 2024
6e2daa3
Add files via upload
selenabr Nov 18, 2024
6778796
Merge branch 'dev' into bachelor-thesis-selena
selenabr Mar 4, 2025
01e9d5f
merge bachelor-thesis-selena into dev
selenabr Mar 4, 2025
412dfd1
fixed error in power_analysis.py (constants.color) and commented file…
selenabr Mar 4, 2025
7b6c159
changed steps to new format
Jonas0000 Mar 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 46 additions & 25 deletions protzilla/data_analysis/differential_expression_mann_whitney.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,22 @@
import pandas as pd
from scipy import stats

from protzilla.data_analysis.differential_expression_helper import _map_log_base, apply_multiple_testing_correction
from protzilla.data_analysis.differential_expression_helper import (
_map_log_base,
apply_multiple_testing_correction,
)
from protzilla.utilities.transform_dfs import long_to_wide


def mann_whitney_test_on_intensity_data(
intensity_df: pd.DataFrame,
metadata_df: pd.DataFrame,
grouping: str,
group1: str,
group2: str,
log_base: str = None,
alpha=0.05,
multiple_testing_correction_method: str = "",
intensity_df: pd.DataFrame,
metadata_df: pd.DataFrame,
grouping: str,
group1: str,
group2: str,
log_base: str = None,
alpha=0.05,
multiple_testing_correction_method: str = "",
) -> dict:
wide_df = long_to_wide(intensity_df)

Expand All @@ -31,13 +34,24 @@ def mann_whitney_test_on_intensity_data(
multiple_testing_correction_method=multiple_testing_correction_method,
columns_name="Protein ID",
)
differentially_expressed_proteins_df = pd.merge(intensity_df, outputs["differential_expressed_columns_df"], on="Protein ID", how="left")
differentially_expressed_proteins_df = pd.merge(
intensity_df,
outputs["differential_expressed_columns_df"],
on="Protein ID",
how="left",
)
differentially_expressed_proteins_df = differentially_expressed_proteins_df.loc[
differentially_expressed_proteins_df["Protein ID"].isin(outputs["differential_expressed_columns_df"]["Protein ID"])
differentially_expressed_proteins_df["Protein ID"].isin(
outputs["differential_expressed_columns_df"]["Protein ID"]
)
]
significant_proteins_df = pd.merge(intensity_df, outputs["significant_columns_df"], on="Protein ID", how="left")
significant_proteins_df = pd.merge(
intensity_df, outputs["significant_columns_df"], on="Protein ID", how="left"
)
significant_proteins_df = significant_proteins_df.loc[
significant_proteins_df["Protein ID"].isin(outputs["significant_columns_df"]["Protein ID"])
significant_proteins_df["Protein ID"].isin(
outputs["significant_columns_df"]["Protein ID"]
)
]

return dict(
Expand All @@ -50,16 +64,17 @@ def mann_whitney_test_on_intensity_data(
messages=outputs["messages"],
)


def mann_whitney_test_on_columns(
df: pd.DataFrame,
metadata_df: pd.DataFrame,
grouping: str,
group1: str,
group2: str,
log_base: str = None,
alpha=0.05,
multiple_testing_correction_method: str = "",
columns_name: str = "Protein ID",
df: pd.DataFrame,
metadata_df: pd.DataFrame,
grouping: str,
group1: str,
group2: str,
log_base: str = None,
alpha=0.05,
multiple_testing_correction_method: str = "",
columns_name: str = "Protein ID",
) -> dict:
"""
Perform Mann-Whitney U test on all columns of the data frame.
Expand Down Expand Up @@ -104,7 +119,9 @@ def mann_whitney_test_on_columns(
for column in data_columns:
group1_data = df_with_groups[df_with_groups[grouping] == group1][column]
group2_data = df_with_groups[df_with_groups[grouping] == group2][column]
u_statistic, p_value = stats.mannwhitneyu(group1_data, group2_data, alternative="two-sided")
u_statistic, p_value = stats.mannwhitneyu(
group1_data, group2_data, alternative="two-sided"
)

if not np.isnan(p_value):
log2_fold_change = (
Expand Down Expand Up @@ -149,9 +166,13 @@ def mann_whitney_test_on_columns(

significant_columns_df = combined_df[
combined_df["corrected_p_value"] <= corrected_alpha
]
]

messages = [dict(level=logging.INFO, msg=f"Invalid columns: {invalid_columns}")] if invalid_columns else []
messages = (
[dict(level=logging.INFO, msg=f"Invalid columns: {invalid_columns}")]
if invalid_columns
else []
)

return dict(
differential_expressed_columns_df=combined_df,
Expand Down
Loading