Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions protzilla/data_preprocessing/imputation.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,7 @@ def by_knn_plot(
graph_type_quantities,
group_by,
visual_transformation,
proteins_of_interest=None,
):
return _build_box_hist_plot(
method_inputs["protein_df"],
Expand All @@ -367,6 +368,7 @@ def by_knn_plot(
graph_type_quantities,
group_by,
visual_transformation,
[] if proteins_of_interest is None else proteins_of_interest,
)


Expand All @@ -377,6 +379,7 @@ def by_normal_distribution_sampling_plot(
graph_type_quantities,
group_by,
visual_transformation,
proteins_of_interest=None,
):
return _build_box_hist_plot(
method_inputs["protein_df"],
Expand All @@ -385,6 +388,7 @@ def by_normal_distribution_sampling_plot(
graph_type_quantities,
group_by,
visual_transformation,
[] if proteins_of_interest is None else proteins_of_interest,
)


Expand All @@ -395,6 +399,7 @@ def by_simple_imputer_plot(
graph_type_quantities,
group_by,
visual_transformation,
proteins_of_interest=None,
):
return _build_box_hist_plot(
method_inputs["protein_df"],
Expand All @@ -403,6 +408,7 @@ def by_simple_imputer_plot(
graph_type_quantities,
group_by,
visual_transformation,
[] if proteins_of_interest is None else proteins_of_interest,
)


Expand All @@ -413,6 +419,7 @@ def by_min_per_sample_plot(
graph_type_quantities,
group_by,
visual_transformation,
proteins_of_interest=None,
):
return _build_box_hist_plot(
method_inputs["protein_df"],
Expand All @@ -421,6 +428,7 @@ def by_min_per_sample_plot(
graph_type_quantities,
group_by,
visual_transformation,
[] if proteins_of_interest is None else proteins_of_interest,
)


Expand All @@ -431,6 +439,7 @@ def by_min_per_protein_plot(
graph_type_quantities,
group_by,
visual_transformation,
proteins_of_interest=None,
):
return _build_box_hist_plot(
method_inputs["protein_df"],
Expand All @@ -439,6 +448,7 @@ def by_min_per_protein_plot(
graph_type_quantities,
group_by,
visual_transformation,
[] if proteins_of_interest is None else proteins_of_interest,
)


Expand All @@ -449,6 +459,7 @@ def by_min_per_dataset_plot(
graph_type_quantities,
group_by,
visual_transformation,
proteins_of_interest=None,
):
return _build_box_hist_plot(
method_inputs["protein_df"],
Expand All @@ -457,6 +468,7 @@ def by_min_per_dataset_plot(
graph_type_quantities,
group_by,
visual_transformation,
[] if proteins_of_interest is None else proteins_of_interest,
)


Expand All @@ -471,6 +483,7 @@ def _build_box_hist_plot(
graph_type_quantities: str = "Pie chart",
group_by: str = "None",
visual_transformation: str = "linear",
proteins_of_interest= None,
) -> list[Figure]:
"""
This function creates two visualisations:
Expand Down Expand Up @@ -506,6 +519,7 @@ def _build_box_hist_plot(
heading="Distribution of Protein Intensities",
group_by=group_by,
visual_transformation=visual_transformation,
proteins_of_interest=proteins_of_interest,
y_title="Intensity",
)
elif graph_type == "Histogram":
Expand Down
27 changes: 18 additions & 9 deletions protzilla/data_preprocessing/normalisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,14 +233,16 @@ def by_z_score_plot(
method_outputs,
graph_type,
group_by,
visual_transformation
visual_transformation,
proteins_of_interest=None,
):
return _build_box_hist_plot(
method_inputs["protein_df"],
method_outputs["protein_df"],
graph_type,
group_by,
visual_transformation
visual_transformation,
[] if proteins_of_interest is None else proteins_of_interest,
)


Expand All @@ -249,13 +251,15 @@ def by_median_plot(
method_outputs,
graph_type,
group_by,
visual_transformation
visual_transformation,
proteins_of_interest=None,
):
return _build_box_hist_plot(
method_inputs["protein_df"],
method_outputs["protein_df"],
graph_type, group_by,
visual_transformation
visual_transformation,
[] if proteins_of_interest is None else proteins_of_interest,
)


Expand All @@ -264,13 +268,15 @@ def by_totalsum_plot(
method_outputs,
graph_type,
group_by,
visual_transformation
visual_transformation,
proteins_of_interest=None,
):
return _build_box_hist_plot(
method_inputs["protein_df"],
method_outputs["protein_df"],
graph_type, group_by,
visual_transformation
visual_transformation,
[] if proteins_of_interest is None else proteins_of_interest,
)


Expand All @@ -279,18 +285,20 @@ def by_reference_protein_plot(
method_outputs,
graph_type,
group_by,
visual_transformation
visual_transformation,
proteins_of_interest=None,
):
return _build_box_hist_plot(
method_inputs["protein_df"],
method_outputs["protein_df"],
graph_type,
group_by,
visual_transformation
visual_transformation,
[] if proteins_of_interest is None else proteins_of_interest,
)


def _build_box_hist_plot(df, result_df, graph_type, group_by, visual_transformation):
def _build_box_hist_plot(df, result_df, graph_type, group_by, visual_transformation, proteins_of_interest=None):
if graph_type == "Boxplot":
fig = create_box_plots(
dataframe_a=df,
Expand All @@ -302,6 +310,7 @@ def _build_box_hist_plot(df, result_df, graph_type, group_by, visual_transformat
y_title="Intensity",
group_by=group_by,
visual_transformation=visual_transformation,
proteins_of_interest=proteins_of_interest,
)
if graph_type == "Histogram":
fig = create_histograms(
Expand Down
33 changes: 23 additions & 10 deletions protzilla/data_preprocessing/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def create_bar_plot(
names_of_sectors: "list[str]",
values_of_sectors: "list[int]",
heading: str = "",
colour: "list[str]" = PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE,
color: "list[str]" = PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE,
y_title: str = "",
x_title: str = "",
) -> Figure:
Expand All @@ -76,7 +76,7 @@ def create_bar_plot(
fig = px.bar(
x=names_of_sectors,
y=values_of_sectors,
color=colour[: len(values_of_sectors)],
color=color[: len(values_of_sectors)],
color_discrete_map="identity",
)

Expand Down Expand Up @@ -110,6 +110,7 @@ def create_box_plots(
y_title: str = "",
x_title: str = "",
group_by: str = "None",
proteins_of_interest=None,
visual_transformation: str = "linear",
) -> Figure:
"""
Expand All @@ -129,6 +130,7 @@ def create_box_plots(
:param y_title: Optional y-axis title for graphs.
:param x_title: Optional x-axis title for graphs.
:param group_by: Optional argument to create a grouped boxplot\
:param proteins_of_interest: List of proteins to be included in the boxplot
:param visual_transformation: Visual transformation of the y-axis data.
graph. Arguments can be either "Sample" to group by sample or\
"Protein ID" to group by protein. Leave "None" to get ungrouped\
Expand All @@ -141,19 +143,29 @@ def create_box_plots(
f"""Group_by parameter must be "None" or
"Sample" or "Protein ID" but is {group_by}"""
)
intensity_name_a = default_intensity_column(dataframe_a)
intensity_name_b = default_intensity_column(dataframe_b)
if len(proteins_of_interest) == 0:
proteins_of_interest = dataframe_a["Protein ID"]
elif isinstance(proteins_of_interest, str):
proteins_of_interest = [proteins_of_interest]

filtered_df = dataframe_a[dataframe_a["Protein ID"].isin(proteins_of_interest)]
filtered_result_df = dataframe_b[
dataframe_b["Protein ID"].isin(proteins_of_interest)
]
intensity_name_a = default_intensity_column(filtered_df)
intensity_name_b = default_intensity_column(filtered_result_df)

if group_by in {"Sample", "Protein ID"}:
fig = make_subplots(rows=1, cols=2)
trace0 = go.Box(
y=dataframe_a[intensity_name_a],
x=dataframe_a[group_by],
y=filtered_df[intensity_name_a],
x=filtered_df[group_by],
marker_color=PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE[0],
name=name_a,
)
trace1 = go.Box(
y=dataframe_b[intensity_name_b],
x=dataframe_b[group_by],
y=filtered_result_df[intensity_name_b],
x=filtered_result_df[group_by],
marker_color=PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE[1],
name=name_b,
)
Expand All @@ -164,12 +176,12 @@ def create_box_plots(
elif group_by == "None":
fig = make_subplots(rows=1, cols=2)
trace0 = go.Box(
y=dataframe_a[intensity_name_a],
y=filtered_df[intensity_name_a],
marker_color=PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE[0],
name=name_a,
)
trace1 = go.Box(
y=dataframe_b[intensity_name_b],
y=filtered_result_df[intensity_name_b],
marker_color=PROTZILLA_DISCRETE_COLOR_OUTLIER_SEQUENCE[1],
name=name_b,
)
Expand Down Expand Up @@ -208,6 +220,7 @@ def create_histograms(
heading: str = "",
y_title: str = "",
x_title: str = "",
proteins_of_interest=None,
visual_transformation: str = "linear",
overlay: bool = False,
) -> Figure:
Expand Down
25 changes: 21 additions & 4 deletions protzilla/data_preprocessing/transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,28 @@ def by_log(protein_df: pd.DataFrame, peptide_df: pd.DataFrame | None, log_base="
return dict(protein_df=transformed_df, peptide_df=transformed_peptide_df)


def by_log_plot(method_inputs, method_outputs, graph_type, group_by):
def by_log_plot(
method_inputs,
method_outputs,
graph_type,
group_by,
proteins_of_interest=None,
):
return _build_box_hist_plot(
method_inputs["protein_df"], method_outputs["protein_df"], graph_type, group_by
method_inputs["protein_df"],
method_outputs["protein_df"],
graph_type,
group_by,
[] if proteins_of_interest is None else proteins_of_interest,
)


def _build_box_hist_plot(df, result_df, graph_type, group_by):
def _build_box_hist_plot(
df,
result_df,
graph_type,
group_by,
proteins_of_interest=None,
):
if graph_type == "Boxplot":
fig = create_box_plots(
dataframe_a=df,
Expand All @@ -59,6 +74,7 @@ def _build_box_hist_plot(df, result_df, graph_type, group_by):
name_b="After Transformation",
heading="Distribution of Protein Intensities",
group_by=group_by,
proteins_of_interest=proteins_of_interest,
y_title="Intensity",
)
if graph_type == "Histogram":
Expand All @@ -68,6 +84,7 @@ def _build_box_hist_plot(df, result_df, graph_type, group_by):
name_a="Before Transformation",
name_b="After Transformation",
heading="Distribution of Protein Intensities",
proteins_of_interest=proteins_of_interest,
x_title="Protein Intensities",
y_title="Frequency of Protein Intensities",
)
Expand Down
2 changes: 1 addition & 1 deletion protzilla/steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,7 +661,7 @@ def _clear_future_steps(self, index: int | None = None) -> None:
index = self.current_step_index
if index == len(self.all_steps) - 1:
return
for step in self.all_steps[index + 1 :]:
for step in self.all_steps[index:]:
step.output = Output()
step.messages = Messages()
step.plots = Plots()
6 changes: 6 additions & 0 deletions tests/protzilla/data_preprocessing/test_imputation.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ def test_imputation_min_value_per_df(
"Bar chart",
"Sample",
"linear",
[],
)
if show_figures:
fig1.show()
Expand Down Expand Up @@ -200,6 +201,7 @@ def test_imputation_min_value_per_sample(
"Bar chart",
"Sample",
"linear",
[],
)
if show_figures:
fig1.show()
Expand Down Expand Up @@ -235,6 +237,7 @@ def test_imputation_min_value_per_protein(
"Bar chart",
"Sample",
"linear",
[],
)
if show_figures:
fig1.show()
Expand Down Expand Up @@ -270,6 +273,7 @@ def test_imputation_mean_per_protein(
"Bar chart",
"Sample",
"linear",
[],
)
if show_figures:
fig1.show()
Expand Down Expand Up @@ -303,6 +307,7 @@ def test_imputation_knn(show_figures, input_imputation_df, assertion_df_knn):
"Bar chart",
"Sample",
"linear",
[],
)
if show_figures:
fig1.show()
Expand Down Expand Up @@ -345,6 +350,7 @@ def test_imputation_normal_distribution_sampling(show_figures, input_imputation_
"Bar chart",
"Sample",
"linear",
[],
)
if show_figures:
fig1.show()
Expand Down
Loading