Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions src/pyprideap/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ def _generate_report(
split: bool = False,
sdrf_path: Path | None = None,
no_border: bool = True,
two_sides: bool = True,
) -> Path:
"""Read a data file and generate a QC report."""
import pyprideap as pp
Expand All @@ -114,7 +115,7 @@ def _generate_report(

click.echo("Generating individual plot files...")
logger.debug("Output directory: %s", output_path)
result = qc_report_split(ds, output_path, no_border=no_border)
result = qc_report_split(ds, output_path, no_border=no_border, two_sides=two_sides)
n_files = len(list(result.glob("*.html")))
click.echo(f" {n_files} HTML files saved to {result}/")
return result
Expand Down Expand Up @@ -161,6 +162,11 @@ def main() -> None:
@click.option(
"--no-border/--border", default=True, help="Remove card borders from split plot files (default: no border)."
)
@click.option(
"--two-sides/--no-two-sides",
default=True,
help="Generate combined plot files for side-by-side panel layouts (default: enabled).",
)
@click.option("-v", "--verbose", is_flag=True, default=False, help="Enable verbose logging output.")
def report(
input_file: str | None,
Expand All @@ -170,6 +176,7 @@ def report(
split: bool,
sdrf: str | None,
no_border: bool,
two_sides: bool,
verbose: bool,
) -> None:
"""Generate a QC report from a data file or PAD accession."""
Expand Down Expand Up @@ -203,7 +210,15 @@ def report(
out = Path(f"{output_path}/{stem}")
else:
out = Path(f"{output_path}/{stem}.html")
_generate_report(f, out, platform=platform, split=split, sdrf_path=sdrf_path, no_border=no_border)
_generate_report(
f,
out,
platform=platform,
split=split,
sdrf_path=sdrf_path,
no_border=no_border,
two_sides=two_sides,
)
except Exception as e:
logger.debug("Error processing %s: %s", f.name, e, exc_info=True)
click.echo(f" Skipping {f.name}: {e}", err=True)
Expand All @@ -219,6 +234,7 @@ def report(
split=split,
sdrf_path=sdrf_path,
no_border=no_border,
two_sides=two_sides,
)


Expand Down
2 changes: 1 addition & 1 deletion src/pyprideap/viz/qc/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ class DataCompletenessData:
below_lod_rate: list[float] # per-sample fraction below LOD (0-1)
protein_ids: list[str] = field(default_factory=list) # per-protein identifiers
missing_freq: list[float] = field(default_factory=list) # per-protein fraction below LOD (0-1)
title: str = "Data Completeness"
title: str = "Sample Completeness"


@dataclass
Expand Down
90 changes: 89 additions & 1 deletion src/pyprideap/viz/qc/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,7 @@ def render_data_completeness(data: DataCompletenessData) -> Figure:
fig = make_subplots(
rows=2,
cols=1,
subplot_titles=["Per-Sample Data Completeness", "Missing Frequency Distribution"],
subplot_titles=["Sample Completeness", "Missing Frequency"],
vertical_spacing=0.35,
)

Expand Down Expand Up @@ -670,6 +670,94 @@ def render_data_completeness(data: DataCompletenessData) -> Figure:
return fig


def render_sample_completeness(data: DataCompletenessData) -> Figure:
"""Per-sample stacked bar showing above/below LOD as standalone plot."""
go, _ = _import_plotly()

_MAX_LABEL = 20
short_ids = [s if len(s) <= _MAX_LABEL else s[:_MAX_LABEL] + "\u2026" for s in data.sample_ids]

fig = go.Figure()
fig.add_trace(
go.Bar(
x=short_ids,
y=[r * 100 for r in data.above_lod_rate],
name="Above LOD",
marker_color="#2ecc71",
customdata=data.sample_ids,
hovertemplate="%{customdata}<br>Above LOD: %{y:.1f}%<extra></extra>",
),
)
fig.add_trace(
go.Bar(
x=short_ids,
y=[r * 100 for r in data.below_lod_rate],
name="Below LOD",
marker_color="#f39c12",
customdata=data.sample_ids,
hovertemplate="%{customdata}<br>Below LOD: %{y:.1f}%<extra></extra>",
),
)
fig.update_xaxes(title_text="", tickangle=-45)
fig.update_yaxes(title_text="% of Proteins", range=[0, 100], ticksuffix="%")
fig.update_layout(
title="Sample Completeness",
barmode="stack",
height=500,
legend=dict(orientation="h", yanchor="top", y=-0.18),
margin=dict(b=100),
)
return fig


def render_missing_frequency(data: DataCompletenessData) -> Figure:
"""Per-protein missing frequency histogram as standalone plot."""
go, _ = _import_plotly()

fig = go.Figure()
if data.missing_freq:
missing_pct = [f * 100 for f in data.missing_freq]
fig.add_trace(
go.Histogram(
x=missing_pct,
nbinsx=25,
marker_color="#e74c3c",
showlegend=False,
),
)
n_below_30 = sum(1 for f in data.missing_freq if f < 0.30)
pct_below_30 = n_below_30 / len(data.missing_freq) * 100 if data.missing_freq else 0
fig.add_annotation(
text=f"{pct_below_30:.1f}% of proteins below 30% missing",
xref="paper",
yref="paper",
x=0.98,
y=0.98,
xanchor="right",
yanchor="top",
showarrow=False,
font=dict(size=12),
bgcolor="rgba(255,255,255,0.8)",
)
fig.add_vline(
x=30,
line_dash="dash",
line_color="#e67e22",
line_width=2,
annotation_text="30% threshold",
annotation_position="bottom right",
annotation_font_color="#e67e22",
)
fig.update_xaxes(title_text="Missing Frequency (% Samples Below LOD)", range=[0, 100])
fig.update_yaxes(title_text="Number of Proteins")
fig.update_layout(
title="Missing Frequency Distribution",
height=500,
margin=dict(b=100),
)
return fig


def render_cv_distribution(data: CvDistributionData) -> Figure:
go, _ = _import_plotly()
fig = go.Figure(data=[go.Histogram(x=data.cv_values, nbinsx=50)])
Expand Down
Loading
Loading