diff --git a/protzilla/constants/colors.py b/protzilla/constants/colors.py
index 0c6600f7..9a92e93c 100644
--- a/protzilla/constants/colors.py
+++ b/protzilla/constants/colors.py
@@ -11,4 +11,37 @@
 """First color in list. Conventionally used for visualizing outliers."""
 
 PLOT_SECONDARY_COLOR = PLOT_COLOR_SEQUENCE[1]
-"""Second color in list."""
\ No newline at end of file
+"""Second color in list."""
+
+
+def interpolate_color(color_a, color_b, t):
+    """
+    Interpolate between two RGB color strings based on a float t between 0 and 1.
+
+    Args:
+        color_a (str): RGB color string in the format "#RRGGBB".
+        color_b (str): RGB color string in the format "#RRGGBB".
+        t (float): A float between 0 and 1 representing the interpolation factor.
+
+    Returns:
+        str: Interpolated color as an RGB string in the format "#RRGGBB".
+    """
+    if not (0 <= t <= 1):
+        raise ValueError("Interpolation factor t must be between 0 and 1")
+
+    # Convert hex color strings to RGB tuples
+    def hex_to_rgb(hex_color):
+        hex_color = hex_color.lstrip("#")
+        return tuple(int(hex_color[i : i + 2], 16) for i in (0, 2, 4))
+
+    # Convert RGB tuples back to hex color strings
+    def rgb_to_hex(rgb):
+        return f"#{''.join(f'{c:02x}' for c in rgb)}"
+
+    rgb_a = hex_to_rgb(color_a)
+    rgb_b = hex_to_rgb(color_b)
+
+    # Interpolate each color channel
+    interpolated_rgb = tuple(int(a + (b - a) * t) for a, b in zip(rgb_a, rgb_b))
+
+    return rgb_to_hex(interpolated_rgb)
diff --git a/protzilla/data_analysis/protein_coverage.py b/protzilla/data_analysis/protein_coverage.py
new file mode 100644
index 00000000..bd77d783
--- /dev/null
+++ b/protzilla/data_analysis/protein_coverage.py
@@ -0,0 +1,514 @@
+from colour import color_scale
+from docutils.nodes import title
+from tqdm import tqdm
+from protzilla.constants.paths import EXTERNAL_DATA_PATH
+from protzilla.data_analysis.differential_expression_mann_whitney import (
+    mann_whitney_test_on_intensity_data,
+)
+from protzilla.disk_operator import PickleOperator
+from protzilla.constants.colors import (
+    PLOT_PRIMARY_COLOR,
+    PLOT_COLOR_SEQUENCE,
+    interpolate_color,
+)
+from dataclasses import dataclass
+import pandas as pd
+from numpy import log2
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+from protzilla.constants.protzilla_logging import logger
+
+# import StrEnum
+from enum import StrEnum
+
+INTENSITY_COLORS = ["#FFFFFF", PLOT_COLOR_SEQUENCE[3]]
+SEQUENCE_DEPTH_PEPTIDE_SPACING = 1
+
+
+class IntensityNormalization(StrEnum):
+    min_max_scaling = "min_max_scaling"
+    none = "none"
+
+
+class AggregationMethod(StrEnum):
+    median = "median"
+    mean = "mean"
+
+
+@dataclass(unsafe_hash=True)
+class PeptideMatch:
+    peptide_sequence: str
+    start_location_on_protein: int
+    end_location_on_protein: int
+    intensity: float = 0.0
+    metadata_group: str = ""
+
+
+@dataclass(unsafe_hash=True)
+class ProteinHit:
+    protein_id: str
+    start_location_on_protein: int
+    end_location_on_protein: int
+
+
+class IntensityNormalization(StrEnum):
+    min_max_scaling = "min_max_scaling"
+    none = "none"
+
+
+def build_kmer_dictionary(
+    protein_dictionary: dict[str, str], k: int = 5
+) -> dict[str, list[tuple[str, int]]]:
+    """
+    Builds a dictionary of all kmers in a list of protein sequences. The kmers are generated by sliding a window of size
+    k along the protein sequences. The dictionary maps a the k-mer to a list of tuples containing the protein ID and
+    the index of
+    the k-mer in the protein sequence. The dictionary is saved to disk for future use.
+    """
+    # if the dictionary already exists, load it from disk
+    kmer_dict_path = EXTERNAL_DATA_PATH / f"kmer_dict_{k}.pkl"
+    if kmer_dict_path.exists():
+        kmer_dict = PickleOperator.read(kmer_dict_path)
+        return kmer_dict
+    kmer_dict = {}
+    for protein_id, protein_sequence in tqdm(
+        protein_dictionary.items(),
+        desc="Building kmer dictionary",
+        unit_scale=True,
+        unit="protein",
+    ):
+        for i in range(len(protein_sequence) - k + 1):
+            kmer = protein_sequence[i : i + k]
+            if kmer in kmer_dict:
+                kmer_dict[kmer].append((protein_id, i))
+            else:
+                kmer_dict[kmer] = [(protein_id, i)]
+    PickleOperator.write(kmer_dict_path, kmer_dict)
+    return kmer_dict
+
+
+def match_peptide_to_protein_ids(
+    peptide_sequence: str,
+    protein_kmer_dictionary: dict[str, list[tuple[str, int]]],
+    protein_dictionary: dict[str, str],
+) -> list[ProteinHit]:
+    """
+    Matches a peptide sequence to a dictionary of kmers in protein sequences.
+    Returns a list of ProteinHit objects containing the protein ID and the start location of the peptide on the protein.
+    """
+    if len(peptide_sequence) == 0:
+        raise ValueError("Peptide sequence is empty.")
+    k = 5
+    peptide_kmers = [
+        peptide_sequence[i : i + k] for i in range(len(peptide_sequence) - k + 1)
+    ]
+    first_kmer, last_kmer = peptide_kmers[0], peptide_kmers[-1]
+    # match the kmers
+    first_kmer_matches = protein_kmer_dictionary.get(first_kmer, [])
+    last_kmer_matches = protein_kmer_dictionary.get(last_kmer, [])
+    if first_kmer_matches == [] or last_kmer_matches == []:
+        return []
+
+    hits = [
+        ProteinHit(
+            protein_id, start_first_kmer, start_first_kmer + len(peptide_sequence)
+        )
+        for protein_id, start_first_kmer in first_kmer_matches
+        for _, start_last_kmer in last_kmer_matches
+        if protein_id == _
+        and start_last_kmer == start_first_kmer + len(peptide_sequence) - k
+        and protein_dictionary[protein_id][
+            start_first_kmer : start_first_kmer + len(peptide_sequence)
+        ]
+        == peptide_sequence
+    ]
+    hits = list(set(hits))  # remove duplicates
+
+    # Just sanity checks
+    for hit in hits:
+        subsequence = protein_dictionary[hit.protein_id][
+            hit.start_location_on_protein : hit.end_location_on_protein
+        ]
+        assert len(subsequence) == len(
+            peptide_sequence
+        ), f"Lengths do not match: {len(subsequence)} != {len(peptide_sequence)}"
+        assert subsequence in peptide_sequence, (
+            f"Subsequence not in peptide sequence:\nA: {subsequence}\nB: "
+            f"{peptide_sequence}"
+        )
+        assert peptide_sequence in protein_dictionary[hit.protein_id], (
+            f"Peptide not in protein sequence: {peptide_sequence} not in "
+            f"{protein_dictionary[protein_id]}"
+        )
+    return hits
+
+
+def plot_protein_coverage(
+    fasta_df: pd.DataFrame,
+    peptide_df: pd.DataFrame,
+    metadata_df: pd.DataFrame,
+    protein_id: str,
+    grouping: str,
+    selected_groups: list[str] = None,
+    aggregation_method: AggregationMethod = AggregationMethod.median,
+) -> None:
+    """
+    Plots the coverage of a protein sequence by peptides.
+    """
+    # generate the protein kmer dictionary
+    if selected_groups is None or selected_groups == []:
+        raise ValueError("No samples provided.")
+
+    # add group information to the peptide dataframe
+    peptide_df = peptide_df.merge(metadata_df, on="Sample")
+
+    reduced_peptide_df = peptide_df[
+        peptide_df[grouping].isin(selected_groups) & peptide_df["Intensity"] > 0
+    ]
+    if len(reduced_peptide_df) == 0:
+        raise ValueError("No peptides found for the samples provided.")
+
+    protein_sequence_dict = dict(
+        zip(fasta_df["Protein ID"], fasta_df["Protein Sequence"])
+    )
+    sequence_kmer_dict = build_kmer_dictionary(protein_sequence_dict, k=5)
+
+    if protein_id not in protein_sequence_dict:
+        raise ValueError(f"Protein ID {protein_id} not found in protein dictionary.")
+
+    protein_sequence = protein_sequence_dict[protein_id]
+    peptide_matches = []
+    coverage_by_group_name = {
+        sample: [0] * len(protein_sequence) for sample in selected_groups
+    }
+
+    for (group_name, peptide_sequence), row in tqdm(
+        reduced_peptide_df[[grouping, "Sequence", "Intensity"]]
+        .drop_duplicates()
+        .groupby([grouping, "Sequence"]),
+        desc="Matching peptides to protein",
+        unit_scale=True,
+        unit="peptide",
+    ):
+        sample = extract_peptide_from_slice(row, aggregation_method)
+        if sample.empty:
+            continue
+        intensity = sample["Intensity"].values[0]
+        protein_hits = match_peptide_to_protein_ids(
+            peptide_sequence=peptide_sequence,
+            protein_kmer_dictionary=sequence_kmer_dict,
+            protein_dictionary=protein_sequence_dict,
+        )
+        # we only care about the hits pertaining to the argument-supplied protein id
+        filtered_protein_hits = filter(
+            lambda x: x.protein_id == protein_id, protein_hits
+        )
+        for protein_hit in filtered_protein_hits:
+            # add the peptide sequence and location to the peptide matches pertaining to the protein id
+            peptide_matches.append(
+                PeptideMatch(
+                    peptide_sequence=peptide_sequence,
+                    start_location_on_protein=protein_hit.start_location_on_protein,
+                    end_location_on_protein=protein_hit.end_location_on_protein,
+                    intensity=log2(intensity),
+                    metadata_group=group_name,
+                )
+            )
+            # update the coverage of the protein sequence
+            increment_coverage(coverage_by_group_name[group_name], protein_hit)
+
+    if len(peptide_matches) == 0:
+        raise ValueError(f"No peptides matched for protein {protein_id}")
+
+    # now that all the matches have been determined with their start and end on the protein sequence, we need to find
+    # an optimal solution for the location of their rectangles in the plot without overlap while minimizing the
+    # required number of rows (vertical space)
+    rows_by_group_name = distribute_to_rows(peptide_matches)
+
+    fig = build_coverage_plot(
+        coverages_by_group_name=coverage_by_group_name,
+        peptide_matches=peptide_matches,
+        protein_id=protein_id,
+        protein_sequence=protein_sequence,
+        rows_by_group_name=rows_by_group_name,
+        grouping=grouping,
+        aggregation_method=aggregation_method,
+    )
+
+    return dict(plots=[fig])
+
+
+def build_coverage_plot(
+    coverages_by_group_name: list[int],
+    peptide_matches: list[PeptideMatch],
+    protein_id: str,
+    protein_sequence: str,
+    rows_by_group_name: dict[str, list[list[PeptideMatch]]],
+    intensity_normalization: IntensityNormalization = IntensityNormalization.min_max_scaling,
+    grouping: str = "",
+    aggregation_method: AggregationMethod = AggregationMethod.median,
+) -> go.Figure:
+    number_of_subplots = len(rows_by_group_name.keys())
+    protein_sequence_labels = [
+        f"{amino_acid} - {amino_acid_index} "
+        for amino_acid_index, amino_acid in enumerate(protein_sequence)
+    ]
+    subplot_titles = [
+        f"Peptides of {group_name}" for group_name in rows_by_group_name.keys()
+    ]
+    max_coverage_value = get_max_coverage(coverages_by_group_name)
+
+    fig = make_subplots(
+        rows=number_of_subplots,
+        cols=1,
+        shared_xaxes=True,
+        vertical_spacing=0.1,
+        subplot_titles=subplot_titles,
+        specs=[[{"secondary_y": True}] for _ in range(number_of_subplots)],
+    )
+    # Peptides
+    for group_index, group_name in enumerate(rows_by_group_name.keys(), start=1):
+        peptide_rows = rows_by_group_name[group_name]
+        coverage = coverages_by_group_name[group_name]
+
+        # Calculate the desired heights
+        max_bar_height = max_coverage_value
+        desired_peptide_height = max_bar_height * 2  # 1:2 ratio
+
+        # Calculate the height scaling factor for peptides
+        num_peptide_rows = len(peptide_rows)
+        if num_peptide_rows > 0:
+            # Include spacing in the calculation
+            total_spacing = SEQUENCE_DEPTH_PEPTIDE_SPACING
+            peptide_box_height = (
+                desired_peptide_height - total_spacing
+            ) / num_peptide_rows
+        else:
+            peptide_box_height = 1.0
+
+        add_sequence_depth_to_plot(
+            fig=fig,
+            coverage=coverage,
+            group_subplot_index=group_index,
+            protein_sequence_labels=protein_sequence_labels,
+        )
+
+        # Peptide plot
+        current_group_intensities = [
+            peptide_match.intensity for row in peptide_rows for peptide_match in row
+        ]
+        max_intensity, min_intensity = max(current_group_intensities), min(
+            current_group_intensities
+        )
+        scale_intensity = lambda intensity: (
+            (intensity - min_intensity) / (max_intensity - min_intensity)
+            if max_intensity != min_intensity
+            else 0
+        )
+        for row_index, row in enumerate(peptide_rows):
+            for peptide_match in row:
+                add_peptide_to_plot(
+                    fig=fig,
+                    normalized_intensity=scale_intensity(peptide_match.intensity),
+                    peptide_match=peptide_match,
+                    group_subplot_index=group_index,
+                    protein_sequence_labels=protein_sequence_labels,
+                    row_index=row_index,
+                    offset=max_coverage_value + SEQUENCE_DEPTH_PEPTIDE_SPACING,
+                    box_height=peptide_box_height,
+                    color_a=INTENSITY_COLORS[0],
+                    color_b=INTENSITY_COLORS[1],
+                    grouping=grouping,
+                )
+
+        fig.update_layout(hovermode="closest")
+        # Update y-axis range to accommodate both the bar plot and peptide sections
+        total_height = (
+            max_coverage_value + SEQUENCE_DEPTH_PEPTIDE_SPACING + desired_peptide_height
+        )
+        fig.update_yaxes(
+            range=[0, total_height],
+            showticklabels=False,
+            showgrid=False,
+            row=group_index,
+            col=1,
+        )
+        fig.update_xaxes(showticklabels=False, showgrid=False, row=group_index, col=1)
+    fig.update_xaxes(showticklabels=True, row=number_of_subplots, col=1)
+
+    add_intensity_legend(fig, aggregation_method)
+
+    return fig
+
+
+def add_intensity_legend(
+    fig, aggregation_method: AggregationMethod = AggregationMethod.median
+):
+    color_scale = [[0, INTENSITY_COLORS[0]], [1, INTENSITY_COLORS[1]]]
+    color_legend_trace = go.Scatter(
+        x=[None],
+        y=[None],
+        mode="markers",
+        marker=dict(
+            colorscale=color_scale,
+            showscale=True,
+            cmin=0,
+            cmax=1,
+            colorbar=dict(
+                title=f"Intensity of peptide<br>(aggregated via {aggregation_method})",
+                x=1.0,  # Move further outside the plot
+                len=0.9,  # Increase length of colorbar (70% of subplot height)
+                thickness=30,  # Increase thickness of colorbar
+                titleside="right",  # Move title to the right side of the bar
+            ),
+            size=10,
+        ),
+        showlegend=False,
+    )
+    fig.add_trace(color_legend_trace, row=1, col=1)
+
+
+def add_sequence_depth_to_plot(
+    fig: go.Figure,
+    coverage: list[int],
+    group_subplot_index: int,
+    protein_sequence_labels: list[str],
+) -> None:
+    SEQUENCE_DEPTH_BAR_CHART = dict(
+        x=protein_sequence_labels,
+        y=coverage,
+        showlegend=False,
+        marker=dict(color=PLOT_PRIMARY_COLOR),
+        # add hover text
+        hoverinfo="y",
+        hovertemplate="Sequencing depth at %{x}: %{y}",
+    )
+    fig.add_trace(go.Bar(**SEQUENCE_DEPTH_BAR_CHART), row=group_subplot_index, col=1)
+
+
+def add_peptide_to_plot(
+    fig: go.Figure,
+    normalized_intensity: float,
+    peptide_match: PeptideMatch,
+    group_subplot_index: int,
+    protein_sequence_labels: list[str],
+    row_index: int,
+    offset: int,
+    box_height: float,
+    color_a: str = "#FFFFFF",
+    color_b: str = PLOT_COLOR_SEQUENCE[3],
+    grouping: str = "",
+) -> None:
+    x0, x1 = (
+        peptide_match.start_location_on_protein,
+        peptide_match.end_location_on_protein,
+    )
+    y0, y1 = row_index * box_height + offset, (row_index + 1) * box_height + offset
+    # interpolate between the two colors
+    color = interpolate_color(color_a, color_b, normalized_intensity)
+    PEPTIDE_SHAPE = dict(
+        type="rect",
+        x0=x0 - 0.5,
+        y0=y0,
+        x1=x1 - 0.5,
+        y1=y1,
+        fillcolor=color,
+        line_color=color,
+        layer="above",
+    )
+    fig.add_shape(**PEPTIDE_SHAPE, row=group_subplot_index, col=1)
+    # add invisible plotly object to the rectangle to show the peptide sequence when hovered over
+    fig.add_trace(
+        go.Scatter(
+            x=protein_sequence_labels[
+                peptide_match.start_location_on_protein : peptide_match.end_location_on_protein
+                + 1
+            ],
+            y=[(y0 + y1) / 2] * len(peptide_match.peptide_sequence),
+            text=[
+                f"{grouping}: {peptide_match.metadata_group}<br>Peptide: {peptide_match.peptide_sequence}<br>"
+                f"({peptide_match.start_location_on_protein}-{peptide_match.end_location_on_protein})<br>"
+                f"Intensity: {peptide_match.intensity}"
+            ]
+            * len(peptide_match.peptide_sequence),
+            mode="markers",
+            marker=dict(opacity=0, size=15),  # make marker invisible
+            hoverinfo="text",
+            hovertemplate="%{text}<extra></extra>",
+            showlegend=False,
+        ),
+        row=group_subplot_index,
+        col=1,
+    )
+
+
+def increment_coverage(coverage: list[int], protein_hit: ProteinHit) -> None:
+    coverage[
+        protein_hit.start_location_on_protein : protein_hit.end_location_on_protein
+    ] = [
+        coverage + 1
+        for coverage in coverage[
+            protein_hit.start_location_on_protein : protein_hit.end_location_on_protein
+        ]
+    ]
+
+
+def extract_peptide_from_slice(
+    slice_df: pd.DataFrame,
+    aggregation_strategy: AggregationMethod = AggregationMethod.median,
+) -> pd.DataFrame:
+    """
+    Given multiple peptide datapoints, return a single peptide datapoint with the intensity value extracted from the
+    slice of the peptide data.
+    """
+    if len(slice_df) == 1:
+        return slice_df
+    if aggregation_strategy == AggregationMethod.median:
+        slice_df = slice_df.groupby("Sequence").median()
+    elif aggregation_strategy == AggregationMethod.mean:
+        slice_df = slice_df.groupby("Sequence").mean()
+    else:
+        raise ValueError(f"Unknown strategy: {aggregation_strategy}")
+    return slice_df
+
+
+def distribute_to_rows(
+    peptide_matches: list[PeptideMatch],
+) -> dict[list[list[PeptideMatch]]]:
+    """
+    Distributes peptide matches to rows such that no two peptides overlap in the same row.
+    Greedy algorithm (see Interval Scheduling Problem).
+    """
+    if len(peptide_matches) == 0:
+        raise ValueError(
+            "Attempted to distribute empty list of peptide matches to rows in plot."
+        )
+    peptide_matches.sort(
+        key=lambda peptide_match: peptide_match.start_location_on_protein
+    )
+    rows = {
+        group_name: []
+        for group_name in set(
+            [peptide_match.metadata_group for peptide_match in peptide_matches]
+        )
+    }
+    for peptide_match in peptide_matches:
+        # find the first row that does not overlap with the current peptide
+        for row in rows[peptide_match.metadata_group]:
+            if (
+                row[-1].end_location_on_protein
+                < peptide_match.start_location_on_protein
+            ):
+                row.append((peptide_match))
+                break
+        else:
+            rows[peptide_match.metadata_group].append([(peptide_match)])
+    return rows
+
+
+def get_max_coverage(coverage: dict[list[int]]) -> int:
+    if len(coverage) == 0:
+        raise ValueError(
+            "Cannot calculate maximum coverage value: No coverage data provided."
+        )
+    return max([max(coverage) for coverage in coverage.values()])
diff --git a/protzilla/disk_operator.py b/protzilla/disk_operator.py
index ce7f07ad..dfee538c 100644
--- a/protzilla/disk_operator.py
+++ b/protzilla/disk_operator.py
@@ -6,6 +6,7 @@
 
 import pandas as pd
 import yaml
+import pickle
 from plotly.io import read_json, write_json
 
 import protzilla.utilities as utilities
@@ -36,6 +37,25 @@ def __exit__(self, exc_type, exc_val, exc_tb):
             return False
         return True
 
+class PickleOperator:
+    @staticmethod
+    def read(file_path: Path):
+        with ErrorHandler():
+            with open(file_path, "rb") as file:
+                logger.info(f"Reading pickle from {file_path}")
+                return pickle.load(file)
+
+    @staticmethod
+    def write(file_path: Path, data):
+        with ErrorHandler():
+            if not file_path.exists():
+                if not file_path.parent.exists():
+                    logger.info(
+                        f"Parent directory {file_path.parent} did not exist and was created"
+                    )
+                    file_path.parent.mkdir(parents=True)
+            with open(file_path, "wb") as file:
+                pickle.dump(data, file)
 
 class YamlOperator:
     @staticmethod
diff --git a/protzilla/importing/fasta_import.py b/protzilla/importing/fasta_import.py
new file mode 100644
index 00000000..bbdb9373
--- /dev/null
+++ b/protzilla/importing/fasta_import.py
@@ -0,0 +1,36 @@
+"""
+This module contains the code to parse a fasta file containing protein sequences and their ids.
+"""
+import logging
+
+import pandas as pd
+from Bio import SeqIO
+
+
+def parse_fasta_id(fasta_id: str) -> str:
+    """
+    Parse the fasta id to get the protein name from the fasta id string
+    """
+    metadata = fasta_id.split("|")[1]
+    if len(metadata) < 2:
+        logging.warning(f"Metadata too short: {metadata}")
+        return ""
+    return metadata
+
+
+def fasta_import(file_path: str) -> pd.DataFrame:
+    """
+    Import a fasta file and return a DataFrame with the protein sequences and their protein ids
+    """
+    fasta_iterator = SeqIO.parse(open(file_path), "fasta")
+    protein_ids = []
+    protein_sequences = []
+    for fasta_sequence in fasta_iterator:
+        id, sequence = parse_fasta_id(fasta_sequence.id), str(fasta_sequence.seq)
+        protein_ids.append(id)
+        protein_sequences.append(sequence)
+
+    fasta_sequences = pd.DataFrame(
+        {"Protein ID": protein_ids, "Protein Sequence": protein_sequences}
+    )
+    return {"fasta_df": fasta_sequences}
diff --git a/protzilla/methods/data_analysis.py b/protzilla/methods/data_analysis.py
index 511629f5..05a0d0fc 100644
--- a/protzilla/methods/data_analysis.py
+++ b/protzilla/methods/data_analysis.py
@@ -7,15 +7,17 @@
     k_means,
 )
 from protzilla.data_analysis.differential_expression_anova import anova
-from protzilla.data_analysis.differential_expression_kruskal_wallis import kruskal_wallis_test_on_ptm_data, \
-    kruskal_wallis_test_on_intensity_data
+from protzilla.data_analysis.differential_expression_kruskal_wallis import (
+    kruskal_wallis_test_on_intensity_data,
+    kruskal_wallis_test_on_ptm_data,
+)
 from protzilla.data_analysis.differential_expression_linear_model import linear_model
 from protzilla.data_analysis.differential_expression_mann_whitney import (
-    mann_whitney_test_on_intensity_data, mann_whitney_test_on_ptm_data)
+    mann_whitney_test_on_intensity_data,
+    mann_whitney_test_on_ptm_data,
+)
 from protzilla.data_analysis.differential_expression_t_test import t_test
 from protzilla.data_analysis.dimension_reduction import t_sne, umap
-from protzilla.data_analysis.ptm_analysis import ptms_per_sample, \
-    ptms_per_protein_and_sample, select_peptides_of_protein
 from protzilla.data_analysis.model_evaluation import evaluate_classification_model
 from protzilla.data_analysis.plots import (
     clustergram_plot,
@@ -23,11 +25,12 @@
     prot_quant_plot,
     scatter_plot,
 )
+from protzilla.data_analysis.protein_coverage import plot_protein_coverage
 from protzilla.data_analysis.protein_graphs import peptides_to_isoform, variation_graph
 from protzilla.data_analysis.ptm_analysis import (
-    select_peptides_of_protein,
     ptms_per_protein_and_sample,
     ptms_per_sample,
+    select_peptides_of_protein,
 )
 from protzilla.data_analysis.ptm_quantification import flexiquant_lf
 from protzilla.methods.data_preprocessing import TransformationLog
@@ -164,8 +167,10 @@ def plot(self, inputs):
 class DifferentialExpressionMannWhitneyOnIntensity(DataAnalysisStep):
     display_name = "Mann-Whitney Test"
     operation = "differential_expression"
-    method_description = ("A function to conduct a Mann-Whitney U test between groups defined in the clinical data."
-                          "The p-values are corrected for multiple testing.")
+    method_description = (
+        "A function to conduct a Mann-Whitney U test between groups defined in the clinical data."
+        "The p-values are corrected for multiple testing."
+    )
 
     input_keys = [
         "protein_df",
@@ -191,7 +196,9 @@ def method(self, inputs: dict) -> dict:
 
     def insert_dataframes(self, steps: StepManager, inputs) -> dict:
         if steps.get_step_output(Step, "protein_df", inputs["protein_df"]) is not None:
-            inputs["protein_df"] = steps.get_step_output(Step, "protein_df", inputs["protein_df"])
+            inputs["protein_df"] = steps.get_step_output(
+                Step, "protein_df", inputs["protein_df"]
+            )
         inputs["metadata_df"] = steps.metadata_df
         inputs["log_base"] = steps.get_step_input(TransformationLog, "log_base")
         return inputs
@@ -200,8 +207,10 @@ def insert_dataframes(self, steps: StepManager, inputs) -> dict:
 class DifferentialExpressionMannWhitneyOnPTM(DataAnalysisStep):
     display_name = "Mann-Whitney Test"
     operation = "Peptide analysis"
-    method_description = ("A function to conduct a Mann-Whitney U test between groups defined in the clinical data."
-                          "The p-values are corrected for multiple testing.")
+    method_description = (
+        "A function to conduct a Mann-Whitney U test between groups defined in the clinical data."
+        "The p-values are corrected for multiple testing."
+    )
 
     input_keys = [
         "ptm_df",
@@ -234,8 +243,10 @@ def insert_dataframes(self, steps: StepManager, inputs) -> dict:
 class DifferentialExpressionKruskalWallisOnIntensity(DataAnalysisStep):
     display_name = "Kruskal-Wallis Test"
     operation = "differential_expression"
-    method_description = ("A function to conduct a Kruskal-Wallis test between groups defined in the clinical data."
-                          "The p-values are corrected for multiple testing.")
+    method_description = (
+        "A function to conduct a Kruskal-Wallis test between groups defined in the clinical data."
+        "The p-values are corrected for multiple testing."
+    )
 
     input_keys = [
         "protein_df",
@@ -265,8 +276,10 @@ def insert_dataframes(self, steps: StepManager, inputs) -> dict:
 class DifferentialExpressionKruskalWallisOnIntensity(DataAnalysisStep):
     display_name = "Kruskal-Wallis Test"
     operation = "differential_expression"
-    method_description = ("A function to conduct a Kruskal-Wallis test between groups defined in the clinical data."
-                          "The p-values are corrected for multiple testing.")
+    method_description = (
+        "A function to conduct a Kruskal-Wallis test between groups defined in the clinical data."
+        "The p-values are corrected for multiple testing."
+    )
 
     input_keys = [
         "protein_df",
@@ -288,7 +301,9 @@ def method(self, inputs: dict) -> dict:
         return kruskal_wallis_test_on_intensity_data(**inputs)
 
     def insert_dataframes(self, steps: StepManager, inputs) -> dict:
-        inputs["protein_df"] = steps.get_step_output(Step, "protein_df", inputs["protein_df"])
+        inputs["protein_df"] = steps.get_step_output(
+            Step, "protein_df", inputs["protein_df"]
+        )
         inputs["metadata_df"] = steps.metadata_df
         inputs["log_base"] = steps.get_step_input(TransformationLog, "log_base")
         return inputs
@@ -297,8 +312,10 @@ def insert_dataframes(self, steps: StepManager, inputs) -> dict:
 class DifferentialExpressionKruskalWallisOnPTM(DataAnalysisStep):
     display_name = "Kruskal-Wallis Test"
     operation = "Peptide analysis"
-    method_description = ("A function to conduct a Kruskal-Wallis test between groups defined in the clinical data."
-                          "The p-values are corrected for multiple testing.")
+    method_description = (
+        "A function to conduct a Kruskal-Wallis test between groups defined in the clinical data."
+        "The p-values are corrected for multiple testing."
+    )
 
     input_keys = [
         "ptm_df",
@@ -327,9 +344,11 @@ def insert_dataframes(self, steps: StepManager, inputs) -> dict:
 class PlotVolcano(PlotStep):
     display_name = "Volcano Plot"
     operation = "plot"
-    method_description = ("Plots the results of a differential expression analysis in a volcano plot. The x-axis shows "
-                          "the log2 fold change and the y-axis shows the -log10 of the corrected p-values. The user "
-                          "can define a fold change threshold and an alpha level to highlight significant items.")
+    method_description = (
+        "Plots the results of a differential expression analysis in a volcano plot. The x-axis shows "
+        "the log2 fold change and the y-axis shows the -log10 of the corrected p-values. The user "
+        "can define a fold change threshold and an alpha level to highlight significant items."
+    )
     input_keys = [
         "p_values",
         "fc_threshold",
@@ -762,6 +781,38 @@ def insert_dataframes(self, steps: StepManager, inputs) -> dict:
         return inputs
 
 
+class PlotProteinCoverage(PlotStep):
+    display_name = "Protein Coverage Plot"
+    operation = "plot"
+    method_description = (
+        "Create a protein coverage plot from a protein graph and peptide data"
+    )
+
+    input_keys = [
+        "protein_id",
+        "fasta_df",
+        "metadata_df",
+        "peptide_df",
+        "grouping",
+        "selected_groups",
+        "aggregation_method",
+    ]
+    output_keys = []
+
+    def method(self, inputs: dict) -> dict:
+        return plot_protein_coverage(**inputs)
+
+    def insert_dataframes(self, steps: StepManager, inputs) -> dict:
+        inputs["fasta_df"] = steps.get_step_output(
+            Step, "fasta_df", inputs["fasta_df_instance"]
+        )
+        inputs["peptide_df"] = steps.get_step_output(
+            Step, "peptide_df", inputs["peptide_df_instance"]
+        )
+        inputs["metadata_df"] = steps.metadata_df
+        return
+
+
 class ProteinGraphVariationGraph(DataAnalysisStep):
     display_name = "Protein Variation Graph"
     operation = "protein_graph"
@@ -841,17 +892,23 @@ def insert_dataframes(self, steps: StepManager, inputs) -> dict:
         inputs["metadata_df"] = steps.metadata_df
 
         if inputs["auto_select"]:
-            significant_proteins = (
-                steps.get_step_output(DataAnalysisStep, "significant_proteins_df", inputs["protein_list"]))
-            index_of_most_significant_protein = significant_proteins['corrected_p_value'].idxmin()
-            most_significant_protein = significant_proteins.loc[index_of_most_significant_protein]
+            significant_proteins = steps.get_step_output(
+                DataAnalysisStep, "significant_proteins_df", inputs["protein_list"]
+            )
+            index_of_most_significant_protein = significant_proteins[
+                "corrected_p_value"
+            ].idxmin()
+            most_significant_protein = significant_proteins.loc[
+                index_of_most_significant_protein
+            ]
             inputs["protein_id"] = [most_significant_protein["Protein ID"]]
-            self.messages.append({
-                "level": logging.INFO,
-                "msg":
-                    f"Selected the most significant Protein: {most_significant_protein['Protein ID']}, "
-                    f"from {inputs['protein_list']}"
-            })
+            self.messages.append(
+                {
+                    "level": logging.INFO,
+                    "msg": f"Selected the most significant Protein: {most_significant_protein['Protein ID']}, "
+                    f"from {inputs['protein_list']}",
+                }
+            )
 
         return inputs
 
@@ -859,8 +916,10 @@ def insert_dataframes(self, steps: StepManager, inputs) -> dict:
 class PTMsPerSample(DataAnalysisStep):
     display_name = "PTMs per Sample"
     operation = "Peptide analysis"
-    method_description = ("Analyze the post-translational modifications (PTMs) of a single protein of interest. "
-                          "This function requires a peptide dataframe with PTM information.")
+    method_description = (
+        "Analyze the post-translational modifications (PTMs) of a single protein of interest. "
+        "This function requires a peptide dataframe with PTM information."
+    )
 
     input_keys = [
         "peptide_df",
@@ -882,8 +941,10 @@ def insert_dataframes(self, steps: StepManager, inputs) -> dict:
 class PTMsProteinAndPerSample(DataAnalysisStep):
     display_name = "PTMs per Sample and Protein"
     operation = "Peptide analysis"
-    method_description = ("Analyze the post-translational modifications (PTMs) of all Proteins. "
-                          "This function requires a peptide dataframe with PTM information.")
+    method_description = (
+        "Analyze the post-translational modifications (PTMs) of all Proteins. "
+        "This function requires a peptide dataframe with PTM information."
+    )
 
     input_keys = [
         "peptide_df",
@@ -899,4 +960,4 @@ def insert_dataframes(self, steps: StepManager, inputs) -> dict:
         inputs["peptide_df"] = steps.get_step_output(
             Step, "peptide_df", inputs["peptide_df"]
         )
-        return inputs
\ No newline at end of file
+        return inputs
diff --git a/protzilla/methods/importing.py b/protzilla/methods/importing.py
index 6a2f6835..effcd5f1 100644
--- a/protzilla/methods/importing.py
+++ b/protzilla/methods/importing.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+from protzilla.importing.fasta_import import fasta_import
 from protzilla.importing.metadata_import import (
     metadata_column_assignment,
     metadata_import_method,
@@ -10,7 +11,7 @@
     max_quant_import,
     ms_fragger_import,
 )
-from protzilla.importing.peptide_import import peptide_import, evidence_import
+from protzilla.importing.peptide_import import evidence_import, peptide_import
 from protzilla.steps import Step, StepManager
 
 
@@ -51,7 +52,9 @@ def method(self, inputs):
 class MsFraggerImport(ImportingStep):
     display_name = "MS Fragger Combined Protein Import"
     operation = "Protein Data Import"
-    method_description = "Import the combined_protein.tsv file form output of MS Fragger"
+    method_description = (
+        "Import the combined_protein.tsv file form output of MS Fragger"
+    )
 
     input_keys = ["file_path", "intensity_name", "map_to_uniprot", "aggregation_method"]
     output_keys = ["protein_df"]
@@ -139,4 +142,16 @@ class EvidenceImport(ImportingStep):
     output_keys = ["peptide_df"]
 
     def method(self, inputs):
-        return evidence_import(**inputs)
\ No newline at end of file
+        return evidence_import(**inputs)
+
+
+class FastaImport(ImportingStep):
+    display_name = "Fasta Protein Sequence Import"
+    operation = "fasta_import"
+    method_description = "Import a fasta file containing protein sequences."
+
+    input_keys = ["file_path"]
+    output_keys = ["fasta_df"]
+
+    def method(self, inputs):
+        return fasta_import(**inputs)
diff --git a/protzilla/steps.py b/protzilla/steps.py
index eecf0d04..eac35b65 100644
--- a/protzilla/steps.py
+++ b/protzilla/steps.py
@@ -7,6 +7,7 @@
 from enum import Enum
 from io import BytesIO
 from pathlib import Path
+from protzilla.constants.protzilla_logging import logger
 
 import pandas as pd
 import plotly
@@ -455,6 +456,9 @@ def check_instance_identifier(step):
                 and input_key in step.inputs
             ):
                 return step.inputs[input_key]
+        logging.warning(
+            f"No input {input_key} found for step type {step_type} and instance identifier {instance_identifier}"
+        )
         return None
 
     def all_steps_in_section(self, section: str) -> list[Step]:
@@ -506,7 +510,6 @@ def metadata_df(self) -> pd.DataFrame | None:
         from protzilla.methods.importing import ImportingStep
 
         return self.get_step_output(ImportingStep, "metadata_df")
-        logging.warning("No metadata_df found in steps")
 
     @property
     def preprocessed_output(self) -> Output:
diff --git a/tests/protzilla/data_analysis/test_protein_coverage.py b/tests/protzilla/data_analysis/test_protein_coverage.py
new file mode 100644
index 00000000..2339d840
--- /dev/null
+++ b/tests/protzilla/data_analysis/test_protein_coverage.py
@@ -0,0 +1,285 @@
+import pytest
+from protzilla.data_analysis.protein_coverage import (
+    distribute_to_rows,
+    get_max_coverage,
+    PeptideMatch,
+)
+
+import pytest
+import pandas as pd
+from protzilla.data_analysis.protein_coverage import (
+    extract_peptide_from_slice,
+    AggregationMethod,
+)
+
+import pytest
+from protzilla.data_analysis.protein_coverage import increment_coverage, ProteinHit
+
+import pytest
+from protzilla.data_analysis.protein_coverage import (
+    match_peptide_to_protein_ids,
+    ProteinHit,
+)
+
+
+def test_match_peptide_to_protein_ids_empty_peptide():
+    with pytest.raises(ValueError, match="Peptide sequence is empty."):
+        match_peptide_to_protein_ids("", {}, {})
+
+
+def test_match_peptide_to_protein_ids_no_matches():
+    peptide_sequence = "ABCDE"
+    protein_kmer_dictionary = {"ABCDE": [("protein1", 0)]}
+    protein_dictionary = {"protein1": "XYZ"}
+    result = match_peptide_to_protein_ids(
+        peptide_sequence, protein_kmer_dictionary, protein_dictionary
+    )
+    assert result == []
+
+
+def test_match_peptide_to_protein_ids_single_match():
+    peptide_sequence = "ABCDE"
+    protein_kmer_dictionary = {"ABCDE": [("protein1", 0)]}
+    protein_dictionary = {"protein1": "ABCDE"}
+    result = match_peptide_to_protein_ids(
+        peptide_sequence, protein_kmer_dictionary, protein_dictionary
+    )
+    expected = [
+        ProteinHit(
+            protein_id="protein1",
+            start_location_on_protein=0,
+            end_location_on_protein=5,
+        )
+    ]
+    assert result == expected
+
+
+def test_match_peptide_to_protein_ids_multiple_matches():
+    peptide_sequence = "ABCDE"
+    protein_kmer_dictionary = {"ABCDE": [("protein1", 0), ("protein2", 0)]}
+    protein_dictionary = {"protein1": "ABCDE", "protein2": "ABCDE"}
+    result = match_peptide_to_protein_ids(
+        peptide_sequence, protein_kmer_dictionary, protein_dictionary
+    )
+    expected = list(
+        set(
+            [
+                ProteinHit(
+                    protein_id="protein1",
+                    start_location_on_protein=0,
+                    end_location_on_protein=5,
+                ),
+                ProteinHit(
+                    protein_id="protein2",
+                    start_location_on_protein=0,
+                    end_location_on_protein=5,
+                ),
+            ]
+        )
+    )
+    assert result == expected
+
+
+def test_match_peptide_to_protein_ids_partial_match():
+    peptide_sequence = "ABCDE"
+    protein_kmer_dictionary = {"ABCDE": [("protein1", 0)]}
+    protein_dictionary = {"protein1": "ABCDEXYZ"}
+    result = match_peptide_to_protein_ids(
+        peptide_sequence, protein_kmer_dictionary, protein_dictionary
+    )
+    expected = [
+        ProteinHit(
+            protein_id="protein1",
+            start_location_on_protein=0,
+            end_location_on_protein=5,
+        )
+    ]
+    assert result == expected
+
+
+def test_increment_coverage():
+    coverage = [0, 0, 0, 0, 0]
+    protein_hit = ProteinHit(
+        protein_id="P12345", start_location_on_protein=1, end_location_on_protein=4
+    )
+    increment_coverage(coverage, protein_hit)
+    assert coverage == [0, 1, 1, 1, 0]
+
+
+def test_increment_coverage_multiple_hits():
+    coverage = [0, 0, 0, 0, 0]
+    protein_hit1 = ProteinHit(
+        protein_id="P12345", start_location_on_protein=1, end_location_on_protein=3
+    )
+    protein_hit2 = ProteinHit(
+        protein_id="P12345", start_location_on_protein=2, end_location_on_protein=4
+    )
+    increment_coverage(coverage, protein_hit1)
+    increment_coverage(coverage, protein_hit2)
+    assert coverage == [0, 1, 2, 1, 0]
+
+
+def test_increment_coverage_no_overlap():
+    coverage = [0, 0, 0, 0, 0]
+    protein_hit1 = ProteinHit(
+        protein_id="P12345", start_location_on_protein=0, end_location_on_protein=2
+    )
+    protein_hit2 = ProteinHit(
+        protein_id="P12345", start_location_on_protein=3, end_location_on_protein=5
+    )
+    increment_coverage(coverage, protein_hit1)
+    increment_coverage(coverage, protein_hit2)
+    assert coverage == [1, 1, 0, 1, 1]
+
+
+def test_extract_peptide_from_slice_single_entry():
+    data = {"Sequence": ["PEPTIDE"], "Intensity": [100]}
+    df = pd.DataFrame(data)
+    result = extract_peptide_from_slice(df)
+    assert result.equals(df)
+
+
+def test_extract_peptide_from_slice_multiple_entries():
+    data = {"Sequence": ["PEPTIDE", "PEPTIDE", "PEPTIDE"], "Intensity": [100, 130, 200]}
+    df = pd.DataFrame(data)
+    result = extract_peptide_from_slice(df)
+    expected = pd.DataFrame({"Sequence": "PEPTIDE", "Intensity": [130.0]})
+    expected.set_index("Sequence", inplace=True)
+    assert result.equals(expected)
+
+
+def test_extract_peptide_from_slice_median():
+    data = {"Sequence": ["PEPTIDE", "PEPTIDE"], "Intensity": [100, 200]}
+    df = pd.DataFrame(data)
+    result = extract_peptide_from_slice(df, AggregationMethod.median)
+    expected = pd.DataFrame({"Sequence": "PEPTIDE", "Intensity": [150.0]})
+    expected.set_index("Sequence", inplace=True)
+    assert result.equals(expected)
+
+
+def test_extract_peptide_from_slice_mean():
+    data = {"Sequence": ["PEPTIDE", "PEPTIDE"], "Intensity": [100, 200]}
+    df = pd.DataFrame(data)
+    result = extract_peptide_from_slice(df, AggregationMethod.mean)
+    expected = pd.DataFrame({"Sequence": "PEPTIDE", "Intensity": [150.0]})
+    expected.set_index("Sequence", inplace=True)
+    assert result.equals(expected)
+
+
+def test_extract_peptide_from_slice_unknown_strategy():
+    data = {"Sequence": ["PEPTIDE", "PEPTIDE"], "Intensity": [100, 200]}
+    df = pd.DataFrame(data)
+    with pytest.raises(ValueError, match="Unknown strategy: unknown"):
+        extract_peptide_from_slice(df, "unknown")
+
+
+import pytest
+from protzilla.data_analysis.protein_coverage import distribute_to_rows, PeptideMatch
+
+
+def test_distribute_to_rows_empty():
+    with pytest.raises(
+        ValueError,
+        match="Attempted to distribute empty list of peptide matches to rows in plot.",
+    ):
+        distribute_to_rows([])
+
+
+def test_distribute_to_rows_single_peptide():
+    peptide_matches = [
+        PeptideMatch(
+            peptide_sequence="AAA",
+            start_location_on_protein=0,
+            end_location_on_protein=3,
+            metadata_group="group1",
+        )
+    ]
+    expected = {"group1": [[peptide_matches[0]]]}
+    assert distribute_to_rows(peptide_matches) == expected
+
+
+def test_distribute_to_rows_non_overlapping():
+    peptide_matches = [
+        PeptideMatch(
+            peptide_sequence="AAA",
+            start_location_on_protein=0,
+            end_location_on_protein=3,
+            metadata_group="group1",
+        ),
+        PeptideMatch(
+            peptide_sequence="BBB",
+            start_location_on_protein=4,
+            end_location_on_protein=7,
+            metadata_group="group1",
+        ),
+    ]
+    expected = {"group1": [[peptide_matches[0], peptide_matches[1]]]}
+    assert distribute_to_rows(peptide_matches) == expected
+
+
+def test_distribute_to_rows_overlapping():
+    peptide_matches = [
+        PeptideMatch(
+            peptide_sequence="AAA",
+            start_location_on_protein=0,
+            end_location_on_protein=3,
+            metadata_group="group1",
+        ),
+        PeptideMatch(
+            peptide_sequence="BBB",
+            start_location_on_protein=2,
+            end_location_on_protein=5,
+            metadata_group="group1",
+        ),
+    ]
+    expected = {"group1": [[peptide_matches[0]], [peptide_matches[1]]]}
+    assert distribute_to_rows(peptide_matches) == expected
+
+
+def test_distribute_to_rows_multiple_groups():
+    peptide_matches = [
+        PeptideMatch(
+            peptide_sequence="AAA",
+            start_location_on_protein=0,
+            end_location_on_protein=3,
+            metadata_group="group1",
+        ),
+        PeptideMatch(
+            peptide_sequence="BBB",
+            start_location_on_protein=4,
+            end_location_on_protein=7,
+            metadata_group="group2",
+        ),
+    ]
+    expected = {"group1": [[peptide_matches[0]]], "group2": [[peptide_matches[1]]]}
+    assert distribute_to_rows(peptide_matches) == expected
+
+
+def test_distribute_to_rows():
+    peptide_matches = [
+        PeptideMatch("PEPTIDE1", 0, 7, 1.0, "group1"),
+        PeptideMatch("PEPTIDE2", 8, 15, 1.0, "group1"),
+        PeptideMatch("PEPTIDE3", 16, 23, 1.0, "group1"),
+        PeptideMatch("PEPTIDE4", 0, 7, 1.0, "group2"),
+        PeptideMatch("PEPTIDE5", 8, 15, 1.0, "group2"),
+    ]
+    rows = distribute_to_rows(peptide_matches)
+    assert len(rows["group1"]) == 1
+    assert len(rows["group2"]) == 1
+
+
+def test_get_max_coverage():
+    coverage = {
+        "group1": [1, 2, 3, 4, 5],
+        "group2": [2, 3, 4, 5, 6],
+    }
+    max_coverage = get_max_coverage(coverage)
+    assert max_coverage == 6
+
+
+def test_get_max_coverage_empty():
+    with pytest.raises(
+        ValueError,
+        match="Cannot calculate maximum coverage value: No coverage data provided.",
+    ):
+        get_max_coverage({})
diff --git a/ui/runs/form_mapping.py b/ui/runs/form_mapping.py
index 7221bba6..4d6fae1c 100644
--- a/ui/runs/form_mapping.py
+++ b/ui/runs/form_mapping.py
@@ -22,6 +22,7 @@
     importing.MetadataColumnAssignment: importing_forms.MetadataColumnAssignmentForm,
     importing.PeptideImport: importing_forms.PeptideImportForm,
     importing.EvidenceImport: importing_forms.EvidenceImportForm,
+    importing.FastaImport: importing_forms.FastaImportForm,
     data_preprocessing.FilterProteinsBySamplesMissing: data_preprocessing_forms.FilterProteinsBySamplesMissingForm,
     data_preprocessing.FilterByProteinsCount: data_preprocessing_forms.FilterByProteinsCountForm,
     data_preprocessing.FilterSamplesByProteinsMissing: data_preprocessing_forms.FilterSamplesByProteinsMissingForm,
@@ -52,6 +53,7 @@
     data_analysis.PlotClustergram: data_analysis_forms.PlotClustergramForm,
     data_analysis.PlotProtQuant: data_analysis_forms.PlotProtQuantForm,
     data_analysis.PlotPrecisionRecallCurve: data_analysis_forms.PlotPrecisionRecallCurveForm,
+    data_analysis.PlotProteinCoverage: data_analysis_forms.PlotProteinCoverageForm,
     data_analysis.PlotROC: data_analysis_forms.PlotROCCurveForm,
     data_analysis.ClusteringKMeans: data_analysis_forms.ClusteringKMeansForm,
     data_analysis.ClusteringExpectationMaximisation: data_analysis_forms.ClusteringExpectationMaximizationForm,
diff --git a/ui/runs/forms/data_analysis.py b/ui/runs/forms/data_analysis.py
index 317fd473..1bee889b 100644
--- a/ui/runs/forms/data_analysis.py
+++ b/ui/runs/forms/data_analysis.py
@@ -1,16 +1,10 @@
-import logging
 from enum import Enum, StrEnum
 
-from protzilla.methods.data_preprocessing import DataPreprocessingStep
 from protzilla.methods.data_analysis import (
     DataAnalysisStep,
-    DifferentialExpressionLinearModel,
-    DifferentialExpressionTTest,
     DimensionReductionUMAP,
-    DataAnalysisStep,
     PTMsPerSample,
     SelectPeptidesForProtein,
-    DifferentialExpressionMannWhitneyOnPTM,
 )
 from protzilla.methods.data_preprocessing import DataPreprocessingStep
 from protzilla.run import Run
@@ -25,7 +19,6 @@
     CustomFloatField,
     CustomMultipleChoiceField,
     CustomNumberField,
-    CustomBooleanField,
 )
 
 
@@ -169,7 +162,11 @@ class DifferentialExpressionANOVAForm(MethodForm):
         initial=MultipleTestingCorrectionMethod.benjamini_hochberg,
     )
     alpha = CustomFloatField(
-        label="Error rate (alpha)", min_value=0, max_value=1, step_size=0.01, initial=0.05
+        label="Error rate (alpha)",
+        min_value=0,
+        max_value=1,
+        step_size=0.01,
+        initial=0.05,
     )
 
     grouping = CustomChoiceField(choices=[], label="Grouping from metadata")
@@ -178,12 +175,12 @@ class DifferentialExpressionANOVAForm(MethodForm):
     )
 
     def fill_form(self, run: Run) -> None:
-        self.fields[
-            "protein_df"
-        ].choices = fill_helper.get_choices_for_protein_df_steps(run)
-        self.fields[
-            "grouping"
-        ].choices = fill_helper.get_choices_for_metadata_non_sample_columns(run)
+        self.fields["protein_df"].choices = (
+            fill_helper.get_choices_for_protein_df_steps(run)
+        )
+        self.fields["grouping"].choices = (
+            fill_helper.get_choices_for_metadata_non_sample_columns(run)
+        )
         grouping = self.data.get("grouping", self.fields["grouping"].choices[0][0])
         self.fields["selected_groups"].choices = fill_helper.to_choices(
             run.steps.metadata_df[grouping].unique()
@@ -219,12 +216,12 @@ class DifferentialExpressionTTestForm(MethodForm):
     group2 = CustomChoiceField(choices=[], label="Group 2")
 
     def fill_form(self, run: Run) -> None:
-        self.fields[
-            "protein_df"
-        ].choices = fill_helper.get_choices_for_protein_df_steps(run)
-        self.fields[
-            "grouping"
-        ].choices = fill_helper.get_choices_for_metadata_non_sample_columns(run)
+        self.fields["protein_df"].choices = (
+            fill_helper.get_choices_for_protein_df_steps(run)
+        )
+        self.fields["grouping"].choices = (
+            fill_helper.get_choices_for_metadata_non_sample_columns(run)
+        )
 
         grouping = self.data.get("grouping", self.fields["grouping"].choices[0][0])
 
@@ -256,16 +253,20 @@ class DifferentialExpressionLinearModelForm(MethodForm):
         initial=MultipleTestingCorrectionMethod.benjamini_hochberg,
     )
     alpha = CustomFloatField(
-        label="Error rate (alpha)", min_value=0, max_value=1, step_size=0.01, initial=0.05
+        label="Error rate (alpha)",
+        min_value=0,
+        max_value=1,
+        step_size=0.01,
+        initial=0.05,
     )
     grouping = CustomChoiceField(choices=[], label="Grouping from metadata")
     group1 = CustomChoiceField(choices=[], label="Group 1")
     group2 = CustomChoiceField(choices=[], label="Group 2")
 
     def fill_form(self, run: Run) -> None:
-        self.fields[
-            "grouping"
-        ].choices = fill_helper.get_choices_for_metadata_non_sample_columns(run)
+        self.fields["grouping"].choices = (
+            fill_helper.get_choices_for_metadata_non_sample_columns(run)
+        )
 
         grouping = self.data.get("grouping", self.fields["grouping"].choices[0][0])
 
@@ -293,25 +294,31 @@ def fill_form(self, run: Run) -> None:
 class DifferentialExpressionMannWhitneyOnIntensityForm(MethodForm):
     is_dynamic = True
 
-    protein_df = CustomChoiceField(
-        choices=[], label="Step to use protein data from"
-    )
+    protein_df = CustomChoiceField(choices=[], label="Step to use protein data from")
     multiple_testing_correction_method = CustomChoiceField(
         choices=MultipleTestingCorrectionMethod,
         label="Multiple testing correction",
         initial=MultipleTestingCorrectionMethod.benjamini_hochberg,
     )
     alpha = CustomFloatField(
-        label="Error rate (alpha)", min_value=0, max_value=1, step_size=0.01, initial=0.05
+        label="Error rate (alpha)",
+        min_value=0,
+        max_value=1,
+        step_size=0.01,
+        initial=0.05,
     )
     grouping = CustomChoiceField(choices=[], label="Grouping from metadata")
     group1 = CustomChoiceField(choices=[], label="Group 1")
     group2 = CustomChoiceField(choices=[], label="Group 2")
 
     def fill_form(self, run: Run) -> None:
-        self.fields["protein_df"].choices = fill_helper.get_choices_for_protein_df_steps(run)
+        self.fields["protein_df"].choices = (
+            fill_helper.get_choices_for_protein_df_steps(run)
+        )
 
-        self.fields["grouping"].choices = fill_helper.get_choices_for_metadata_non_sample_columns(run)
+        self.fields["grouping"].choices = (
+            fill_helper.get_choices_for_metadata_non_sample_columns(run)
+        )
 
         grouping = self.data.get("grouping", self.fields["grouping"].choices[0][0])
 
@@ -346,7 +353,11 @@ class DifferentialExpressionMannWhitneyOnPTMForm(MethodForm):
         initial=MultipleTestingCorrectionMethod.benjamini_hochberg,
     )
     alpha = CustomFloatField(
-        label="Error rate (alpha)", min_value=0, max_value=1, step_size=0.01, initial=0.05
+        label="Error rate (alpha)",
+        min_value=0,
+        max_value=1,
+        step_size=0.01,
+        initial=0.05,
     )
     p_value_calculation_method = CustomChoiceField(
         choices=PValueCalculationMethod,
@@ -362,7 +373,9 @@ def fill_form(self, run: Run) -> None:
             run.steps.get_instance_identifiers(PTMsPerSample, "ptm_df")
         )
 
-        self.fields["grouping"].choices = fill_helper.get_choices_for_metadata_non_sample_columns(run)
+        self.fields["grouping"].choices = (
+            fill_helper.get_choices_for_metadata_non_sample_columns(run)
+        )
 
         grouping = self.data.get("grouping", self.fields["grouping"].choices[0][0])
 
@@ -390,16 +403,18 @@ def fill_form(self, run: Run) -> None:
 class DifferentialExpressionKruskalWallisOnIntensityForm(MethodForm):
     is_dynamic = True
 
-    protein_df = CustomChoiceField(
-        choices=[], label="Step to use protein data from"
-    )
+    protein_df = CustomChoiceField(choices=[], label="Step to use protein data from")
     multiple_testing_correction_method = CustomChoiceField(
         choices=MultipleTestingCorrectionMethod,
         label="Multiple testing correction",
         initial=MultipleTestingCorrectionMethod.benjamini_hochberg,
     )
     alpha = CustomFloatField(
-        label="Error rate (alpha)", min_value=0, max_value=1, step_size=0.01, initial=0.05
+        label="Error rate (alpha)",
+        min_value=0,
+        max_value=1,
+        step_size=0.01,
+        initial=0.05,
     )
 
     grouping = CustomChoiceField(choices=[], label="Grouping from metadata")
@@ -408,9 +423,13 @@ class DifferentialExpressionKruskalWallisOnIntensityForm(MethodForm):
     )
 
     def fill_form(self, run: Run) -> None:
-        self.fields["protein_df"].choices = fill_helper.get_choices_for_protein_df_steps(run)
+        self.fields["protein_df"].choices = (
+            fill_helper.get_choices_for_protein_df_steps(run)
+        )
 
-        self.fields["grouping"].choices = fill_helper.get_choices_for_metadata_non_sample_columns(run)
+        self.fields["grouping"].choices = (
+            fill_helper.get_choices_for_metadata_non_sample_columns(run)
+        )
         grouping = self.data.get("grouping", self.fields["grouping"].choices[0][0])
         self.fields["selected_groups"].choices = fill_helper.to_choices(
             run.steps.metadata_df[grouping].unique()
@@ -420,16 +439,18 @@ def fill_form(self, run: Run) -> None:
 class DifferentialExpressionKruskalWallisOnPTMForm(MethodForm):
     is_dynamic = True
 
-    ptm_df = CustomChoiceField(
-        choices=[], label="Step to use ptm data from"
-    )
+    ptm_df = CustomChoiceField(choices=[], label="Step to use ptm data from")
     multiple_testing_correction_method = CustomChoiceField(
         choices=MultipleTestingCorrectionMethod,
         label="Multiple testing correction",
         initial=MultipleTestingCorrectionMethod.benjamini_hochberg,
     )
     alpha = CustomFloatField(
-        label="Error rate (alpha)", min_value=0, max_value=1, step_size=0.01, initial=0.05
+        label="Error rate (alpha)",
+        min_value=0,
+        max_value=1,
+        step_size=0.01,
+        initial=0.05,
     )
 
     grouping = CustomChoiceField(choices=[], label="Grouping from metadata")
@@ -441,7 +462,9 @@ def fill_form(self, run: Run) -> None:
         self.fields["ptm_df"].choices = fill_helper.to_choices(
             run.steps.get_instance_identifiers(PTMsPerSample, "ptm_df")
         )
-        self.fields["grouping"].choices = fill_helper.get_choices_for_metadata_non_sample_columns(run)
+        self.fields["grouping"].choices = (
+            fill_helper.get_choices_for_metadata_non_sample_columns(run)
+        )
         grouping = self.data.get("grouping", self.fields["grouping"].choices[0][0])
         self.fields["selected_groups"].choices = fill_helper.to_choices(
             run.steps.metadata_df[grouping].unique()
@@ -466,7 +489,8 @@ class PlotVolcanoForm(MethodForm):
     def fill_form(self, run: Run) -> None:
         self.fields["input_dict"].choices = fill_helper.to_choices(
             run.steps.get_instance_identifiers(
-                Step, ["corrected_p_values_df", "log2_fold_change_df"],
+                Step,
+                ["corrected_p_values_df", "log2_fold_change_df"],
             )
         )
 
@@ -486,7 +510,9 @@ def fill_form(self, run: Run) -> None:
         if step_output is not None:
             items_of_interest = step_output["PTM"].unique()
 
-        self.fields["items_of_interest"].choices = fill_helper.to_choices(items_of_interest)
+        self.fields["items_of_interest"].choices = fill_helper.to_choices(
+            items_of_interest
+        )
 
 
 class PlotScatterPlotForm(MethodForm):
@@ -804,7 +830,7 @@ class ClassificationRandomForestForm(MethodForm):
     # TODO: Workflow_meta line 1763
     train_val_split = CustomNumberField(
         label="Choose the size of the validation data set (you can either enter the absolute number of validation "
-              "samples or a number between 0.0 and 1.0 to represent the percentage of validation samples)",
+        "samples or a number between 0.0 and 1.0 to represent the percentage of validation samples)",
         initial=0.20,
     )
     # TODO: Workflow_meta line 1770
@@ -892,7 +918,7 @@ class ClassificationSVMForm(MethodForm):
     )
     train_val_split = CustomNumberField(
         label="Choose the size of the validation data set (you can either enter the absolute number of validation "
-              "samples or a number between 0.0 and 1.0 to represent the percentage of validation samples)",
+        "samples or a number between 0.0 and 1.0 to represent the percentage of validation samples)",
         initial=0.20,
     )
     # TODO: Workflow_meta line 1973
@@ -1009,7 +1035,7 @@ class DimensionReductionUMAPForm(MethodForm):
     )
     n_neighbors = CustomNumberField(
         label="The size of local neighborhood (in terms of number of neighboring sample points) used for manifold "
-              "approximation",
+        "approximation",
         min_value=2,
         max_value=100,
         step_size=1,
@@ -1050,7 +1076,7 @@ class ProteinGraphPeptidesToIsoformForm(MethodForm):
     k = CustomNumberField(label="k-mer length", min_value=1, step_size=1, initial=5)
     allowed_mismatches = CustomNumberField(
         label="Number of allowed mismatched amino acids per peptide. For many allowed mismatches, this can take a "
-              "long time.",
+        "long time.",
         min_value=0,
         step_size=1,
         initial=2,
@@ -1064,6 +1090,80 @@ class ProteinGraphVariationGraphForm(MethodForm):
     # TODO: workflow_meta line 2291 - 2295
 
 
+class PlotProteinCoverageForm(MethodForm):
+    from protzilla.data_analysis.protein_coverage import AggregationMethod
+
+    is_dynamic = True
+    peptide_df_instance = CustomChoiceField(
+        choices=[],
+        label="Step to use peptide data from",
+    )
+    fasta_df_instance = CustomChoiceField(
+        choices=[],
+        label="Step to use fasta protein data from",
+    )
+    protein_id = CustomChoiceField(
+        choices=[],
+        label="Protein ID",
+    )
+    grouping = CustomChoiceField(
+        choices=[],
+        label="Grouping from metadata",
+    )
+    selected_groups = CustomMultipleChoiceField(
+        choices=[],
+        label="Select groups / samples to plot",
+    )
+    aggregation_method = CustomChoiceField(
+        choices=fill_helper.to_choices(AggregationMethod),
+        label="Aggregation method",
+        initial=AggregationMethod.median,
+    )
+
+    def fill_form(self, run: Run) -> None:
+        self.fields["peptide_df_instance"].choices = fill_helper.get_choices(
+            run, "peptide_df"
+        )
+        self.fields["fasta_df_instance"].choices = fill_helper.get_choices(
+            run, "fasta_df", Step
+        )
+        peptide_df_instance_id = self.data.get(
+            "peptide_df_instance", self.fields["peptide_df_instance"].choices[0][0]
+        )
+        fasta_df_instance_id = self.data.get(
+            "fasta_df_instance", self.fields["fasta_df_instance"].choices[0][0]
+        )
+        peptide_df = run.steps.get_step_output(
+            Step, "peptide_df", peptide_df_instance_id
+        )
+        fasta_protein_ids = run.steps.get_step_output(
+            Step, "fasta_df", fasta_df_instance_id
+        )["Protein ID"].unique()
+        peptide_df_protein_ids = peptide_df["Protein ID"].unique()
+        common_protein_ids = sorted(
+            list(set(fasta_protein_ids) & set(peptide_df_protein_ids))
+        )
+        self.fields["protein_id"].choices = fill_helper.to_choices(common_protein_ids)
+
+        # Grouping
+        self.fields["grouping"].choices = (
+            fill_helper.get_choices_for_metadata_non_sample_columns(run)
+            + [("Sample", "Sample")]
+        )
+
+        grouping = self.data.get("grouping", self.fields["grouping"].choices[0][0])
+        if grouping == "Sample":
+            self.fields["selected_groups"].choices = fill_helper.to_choices(
+                peptide_df["Sample"].unique()
+            )
+        else:
+            self.fields["selected_groups"].choices = fill_helper.to_choices(
+                run.steps.metadata_df[grouping].unique()
+            )
+        # if grouping is not Sample, show the aggregation method option
+        self.toggle_visibility("aggregation_method", grouping != "Sample")
+
+
 class FLEXIQuantLFForm(MethodForm):
     is_dynamic = True
 
@@ -1143,13 +1243,17 @@ def fill_form(self, run: Run) -> None:
 
         selected_auto_select = self.data.get("auto_select")
 
-        choices = fill_helper.to_choices([] if selected_auto_select else ["all proteins"])
-        choices.extend(fill_helper.get_choices(
-            run, "significant_proteins_df", DataAnalysisStep
-        ))
+        choices = fill_helper.to_choices(
+            [] if selected_auto_select else ["all proteins"]
+        )
+        choices.extend(
+            fill_helper.get_choices(run, "significant_proteins_df", DataAnalysisStep)
+        )
         self.fields["protein_list"].choices = choices
 
-        chosen_list = self.data.get("protein_list", self.fields["protein_list"].choices[0][0])
+        chosen_list = self.data.get(
+            "protein_list", self.fields["protein_list"].choices[0][0]
+        )
         if not selected_auto_select:
             self.toggle_visibility("sort_proteins", True)
             self.toggle_visibility("protein_ids", True)
@@ -1163,7 +1267,9 @@ def fill_form(self, run: Run) -> None:
                     self.fields["protein_ids"].choices = fill_helper.to_choices(
                         run.steps.get_step_output(
                             DataAnalysisStep, "significant_proteins_df", chosen_list
-                        ).sort_values(by="corrected_p_value")["Protein ID"].unique()
+                        )
+                        .sort_values(by="corrected_p_value")["Protein ID"]
+                        .unique()
                     )
                 else:
                     self.fields["protein_ids"].choices = fill_helper.to_choices(
@@ -1183,9 +1289,7 @@ class PTMsPerSampleForm(MethodForm):
     )
 
     def fill_form(self, run: Run) -> None:
-        self.fields["peptide_df"].choices = fill_helper.get_choices(
-            run, "peptide_df"
-        )
+        self.fields["peptide_df"].choices = fill_helper.get_choices(run, "peptide_df")
 
         single_protein_peptides = run.steps.get_instance_identifiers(
             SelectPeptidesForProtein, "peptide_df"
@@ -1207,4 +1311,4 @@ def fill_form(self, run: Run) -> None:
             SelectPeptidesForProtein, "peptide_df"
         )
         if single_protein_peptides:
-            self.fields["peptide_df"].initial = single_protein_peptides[0]
\ No newline at end of file
+            self.fields["peptide_df"].initial = single_protein_peptides[0]
diff --git a/ui/runs/forms/fill_helper.py b/ui/runs/forms/fill_helper.py
index eef83be6..d4f86960 100644
--- a/ui/runs/forms/fill_helper.py
+++ b/ui/runs/forms/fill_helper.py
@@ -1,5 +1,6 @@
 from protzilla.run import Run
 from protzilla.steps import Step
+from protzilla.constants.protzilla_logging import logger
 
 
 def to_choices(choices: list[str], required: bool = True) -> list[tuple[str, str]]:
@@ -29,6 +30,9 @@ def get_choices(
 
 
 def get_choices_for_metadata_non_sample_columns(run: Run) -> list[tuple[str, str]]:
+    if run.steps.metadata_df is None:
+        logger.warning("No metadata_df found in run")
+        return []
     return to_choices(
         run.steps.metadata_df.columns[
             run.steps.metadata_df.columns != "Sample"
diff --git a/ui/runs/forms/importing.py b/ui/runs/forms/importing.py
index 8975e3ba..600ea517 100644
--- a/ui/runs/forms/importing.py
+++ b/ui/runs/forms/importing.py
@@ -192,3 +192,7 @@ def fill_form(self, run: Run) -> None:
         self.fields["map_to_uniprot"].initial = run.steps.get_step_input(
             [MaxQuantImport, MsFraggerImport, DiannImport], "map_to_uniprot"
         )
+
+
+class FastaImportForm(MethodForm):
+    file_path = CustomFileField(label="Fasta file")