diff --git a/README.md b/README.md index e724334..967ccce 100644 --- a/README.md +++ b/README.md @@ -4,13 +4,13 @@ Publication-quality alignment viewer for nucleotide and amino acid sequences. A Supports **BAM files** (with reference FASTA), **pre-aligned FASTA** (e.g. MAFFT output), and **stacking** multiple inputs into a single figure. -![BAM with indels](examples/indel_alignment.png) +![BAM with indels](https://raw.githubusercontent.com/tmsincomb/tview/main/examples/indel_alignment.png) *BAM mode — SNP (yellow), 3bp deletion, 2bp insertion (purple columns), reverse-strand insertion* -![FASTA amino acid alignment](examples/fasta_env_1-120.png) +![FASTA amino acid alignment](https://raw.githubusercontent.com/tmsincomb/tview/main/examples/fasta_env_1-120.png) *FASTA mode — HIV Env protein alignment (HxB2 reference), amino acid palette* -![Stacked BAMs](examples/stacked_bam.png) +![Stacked BAMs](https://raw.githubusercontent.com/tmsincomb/tview/main/examples/stacked_bam.png) *Stacked mode — two BAM files sharing a reference and region* --- @@ -60,6 +60,18 @@ tview \ -o first_120_cols.png ``` +### Classic (black-and-white) mode + +Use `--classic-mode` for textbook-style monochrome output — all black text on a white background with no colored highlighting. Structural conventions (`.` `,` lowercase, `-`) are preserved. + +```bash +tview \ + --fasta aligned.fasta \ + --palette aa \ + --classic-mode \ + -o classic_output.png +``` + --- ## Stacking Multiple Panels @@ -145,6 +157,10 @@ panels = [ bam_panel("sample2.bam", "ref.fa", "chr1:100-200"), ] render_panels(panels, "stacked.png", dpi=300, fontsize=7, cell=0.14) + +# Classic (black-and-white) mode +panel = fasta_panel("aligned.fasta") +render_panels([panel], "classic.png", palette="aa", classic=True) ``` --- @@ -205,6 +221,7 @@ Options: --dpi INTEGER Image resolution. [default: 300] --fontsize INTEGER Base font size in points. [default: 7] --cell FLOAT Cell size in inches. [default: 0.14] + --classic-mode Black-and-white rendering with no color highlighting. -h, --help Show this message and exit. ``` @@ -220,6 +237,7 @@ Options: | `--dpi` | Image resolution | `300` | | `--fontsize` | Base font size in points | `7` | | `--cell` | Cell size in inches (controls spacing) | `0.14` | +| `--classic-mode` | Black-and-white rendering with no color highlighting | `False` | --- @@ -230,6 +248,7 @@ Options: - Use `--fontsize 5` or `6` when displaying wide alignments (>100 columns). - The output format is determined by the file extension: `.png`, `.pdf`, `.svg` all work. - For Nature-style figures, `.pdf` or `.svg` output preserves vector text. +- Use `--classic-mode` for textbook-style monochrome figures that reproduce well in grayscale print. ```bash # Vector output for publication diff --git a/pyproject.toml b/pyproject.toml index df33ba6..e83171e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ classifiers = [ dependencies = [ "matplotlib>=3.5", "click>=8.0", + "pyyaml>=6.0", "pysam>=0.20; sys_platform != 'win32'", ] @@ -47,6 +48,9 @@ Issues = "https://github.com/MurrellGroup/tview/issues" [project.scripts] tview = "tview.cli:main" +[tool.setuptools.package-data] +tview = ["*.yaml"] + [tool.setuptools.packages.find] where = ["src"] diff --git a/src/tview/__init__.py b/src/tview/__init__.py index 4fa5778..38819e4 100644 --- a/src/tview/__init__.py +++ b/src/tview/__init__.py @@ -2,15 +2,11 @@ from importlib.metadata import PackageNotFoundError, version -from tview.tview import ( - AA_COLORS, - NT_COLORS, - Panel, - bam_panel, - fasta_panel, - read_fasta, - render_panels, -) +from tview.bam import bam_panel +from tview.config import AA_COLORS, NT_COLORS +from tview.fasta import fasta_panel, read_fasta +from tview.models import Panel +from tview.renderer import render_panels try: __version__ = version("tview") diff --git a/src/tview/_version.py b/src/tview/_version.py index f123ce6..77c0d72 100644 --- a/src/tview/_version.py +++ b/src/tview/_version.py @@ -27,7 +27,7 @@ commit_id: COMMIT_ID __commit_id__: COMMIT_ID -__version__ = version = "0.1.dev2+g65baca9ef.d20260225" -__version_tuple__ = version_tuple = (0, 1, "dev2", "g65baca9ef.d20260225") +__version__ = version = "0.1.2.dev1" +__version_tuple__ = version_tuple = (0, 1, 2, "dev1") -__commit_id__ = commit_id = "g65baca9ef" +__commit_id__ = commit_id = "g8d3de9eec" diff --git a/src/tview/bam.py b/src/tview/bam.py new file mode 100644 index 0000000..e927ecf --- /dev/null +++ b/src/tview/bam.py @@ -0,0 +1,153 @@ +"""BAM parsing and panel construction.""" + +from __future__ import annotations + +from collections import defaultdict +from pathlib import Path +from typing import Any + +from tview.models import Panel + +# -- CIGAR operations -------------------------------------------------- +CIGAR_MATCH = 0 # M +CIGAR_INS = 1 # I +CIGAR_DEL = 2 # D +CIGAR_REF_SKIP = 3 # N +CIGAR_SOFT_CLIP = 4 # S +CIGAR_SEQ_MATCH = 7 # = +CIGAR_SEQ_MISMATCH = 8 # X + + +def build_read_row( + read: Any, + ref_start: int, + ref_end: int, +) -> tuple[dict[int, str], dict[int, list[str]]]: + """Extract aligned bases and insertions from a single pysam read. + + Walks the CIGAR string to map query bases onto reference positions, + collecting insertions keyed by their anchor reference position. + + Args: + read: A pysam.AlignedSegment with cigartuples and query_sequence. + ref_start: 0-based start of the reference window (inclusive). + ref_end: 0-based end of the reference window (exclusive). + + Returns: + A tuple of (aligned, inserts) where aligned maps ref positions to + bases and inserts maps ref positions to lists of inserted bases. + """ + aligned: dict[int, str] = {} + inserts: dict[int, list[str]] = defaultdict(list) + qpos, rpos = 0, read.reference_start + for op, length in read.cigartuples: + if op in (CIGAR_MATCH, CIGAR_SEQ_MATCH, CIGAR_SEQ_MISMATCH): + for _ in range(length): + if ref_start <= rpos < ref_end: + aligned[rpos] = read.query_sequence[qpos].upper() + qpos += 1 + rpos += 1 + elif op == CIGAR_INS: + anchor = rpos - 1 + if ref_start <= anchor < ref_end: + for j in range(length): + inserts[anchor].append(read.query_sequence[qpos + j].upper()) + qpos += length + elif op == CIGAR_DEL: + for _ in range(length): + if ref_start <= rpos < ref_end: + aligned[rpos] = "-" + rpos += 1 + elif op == CIGAR_REF_SKIP: + rpos += length + elif op == CIGAR_SOFT_CLIP: + qpos += length + return aligned, inserts + + +def bam_panel(bam_path: str | Path, ref_path: str | Path, region: str) -> Panel: + """Build a Panel from a BAM file with reference FASTA and genomic region. + + Reads are sorted by start position and strand. Insertion columns are + expanded so all reads align on a common grid. + + Args: + bam_path: Path to the indexed BAM file. + ref_path: Path to the reference FASTA (must be indexed). + region: Genomic region string in "chrom:start-end" format (0-based start). + + Returns: + A Panel with reference row, read rows, insertion columns, and tick labels. + """ + import pysam + + chrom, rest = region.split(":") + start, end = [int(x) for x in rest.replace(",", "").split("-")] + + with pysam.FastaFile(ref_path) as fasta: + ref_seq = fasta.fetch(chrom, start, end).upper() + + with pysam.AlignmentFile(bam_path, "rb") as samfile: + reads = [ + r + for r in samfile.fetch(chrom, start, end) + if not r.is_unmapped and r.cigartuples + ] + reads.sort(key=lambda r: (r.reference_start, r.is_reverse)) + + # Find max insertion at each ref position + max_ins: dict[int, int] = defaultdict(int) + read_data = [] + for read in reads: + aligned, inserts = build_read_row(read, start, end) + read_data.append((read, aligned, inserts)) + for rpos, bases in inserts.items(): + max_ins[rpos] = max(max_ins[rpos], len(bases)) + + # Build column map + col_map: dict[int, int] = {} + ins_col_set: set[int] = set() + col = 0 + for rpos in range(start, end): + col_map[rpos] = col + col += 1 + n_ins = max_ins.get(rpos, 0) + for j in range(n_ins): + ins_col_set.add(col + j) + col += n_ins + total_cols = col + + # Build ref row with '-' in insertion columns + ref_row: list[str] = [] + for rpos in range(start, end): + ref_row.append(ref_seq[rpos - start]) + for _ in range(max_ins.get(rpos, 0)): + ref_row.append("-") + + # Build sequence rows + seq_rows: list[tuple[str, list[str], bool]] = [] + for read, aligned, inserts in read_data: + row = [" "] * total_cols + for rpos in range(start, end): + c = col_map[rpos] + if rpos in aligned: + row[c] = aligned[rpos] + if rpos in aligned or rpos in inserts: + n_ins = max_ins.get(rpos, 0) + read_ins = inserts.get(rpos, []) + for j in range(n_ins): + if j < len(read_ins): + row[c + 1 + j] = read_ins[j] + else: + row[c + 1 + j] = "-" + seq_rows.append((read.query_name, row, read.is_reverse)) + + # Column labels: 1-based relative, ticks at 1, 10, 20... + ref_width = end - start + tick_1based = [1] + list(range(10, ref_width + 1, 10)) + col_labels = [ + (col_map[start + p - 1], str(p)) for p in tick_1based if (start + p - 1) < end + ] + + label = Path(bam_path).stem + return Panel(label, ref_row, seq_rows, total_cols, col_labels, ins_col_set) diff --git a/src/tview/cli.py b/src/tview/cli.py index 35f4888..a91e59f 100644 --- a/src/tview/cli.py +++ b/src/tview/cli.py @@ -4,7 +4,9 @@ import click -from tview.tview import bam_panel, fasta_panel, render_panels +from tview.bam import bam_panel +from tview.fasta import fasta_panel +from tview.renderer import render_panels def _expand_stdin(paths: list[str]) -> list[str]: @@ -65,7 +67,15 @@ def _expand_stdin(paths: list[str]) -> list[str]: @click.option( "--cell", type=float, default=0.14, show_default=True, help="Cell size in inches." ) -def main(bam, ref, region, fasta, columns, output, palette, dpi, fontsize, cell): +@click.option( + "--classic-mode", + is_flag=True, + default=False, + help="Black-and-white rendering with no color highlighting.", +) +def main( + bam, ref, region, fasta, columns, output, palette, dpi, fontsize, cell, classic_mode +): """Publication-quality alignment viewer (BAM or FASTA). Supports BAM files (with reference FASTA), pre-aligned FASTA (e.g. MAFFT @@ -95,5 +105,11 @@ def main(bam, ref, region, fasta, columns, output, palette, dpi, fontsize, cell) panels.append(fasta_panel(fasta_path, col_start, col_end)) render_panels( - panels, output, fontsize=fontsize, dpi=dpi, palette=palette, cell=cell + panels, + output, + fontsize=fontsize, + dpi=dpi, + palette=palette, + cell=cell, + classic=classic_mode, ) diff --git a/src/tview/config.py b/src/tview/config.py new file mode 100644 index 0000000..61c2fd9 --- /dev/null +++ b/src/tview/config.py @@ -0,0 +1,57 @@ +"""Configuration loader for tview color palettes and style settings. + +Reads palette.yaml and style.yaml from the package directory and exposes +backward-compatible module-level constants. +""" + +from __future__ import annotations + +from functools import lru_cache +from pathlib import Path +from typing import Any + +import yaml + +_PKG_DIR = Path(__file__).parent + + +@lru_cache(maxsize=1) +def load_palette() -> dict[str, Any]: + """Load color palette definitions from palette.yaml. + + Returns: + Parsed YAML dict with ``nucleotide``, ``amino_acid``, and ``rendering`` keys. + """ + with open(_PKG_DIR / "palette.yaml") as fh: + return yaml.safe_load(fh) + + +@lru_cache(maxsize=1) +def load_style() -> dict[str, Any]: + """Load style settings from style.yaml. + + Returns: + Parsed YAML dict with ``alpha`` and ``font`` keys. + """ + with open(_PKG_DIR / "style.yaml") as fh: + return yaml.safe_load(fh) + + +# -- Backward-compatible constants ----------------------------------------- +_palette = load_palette() +_style = load_style() + +NT_COLORS: dict[str, str] = _palette["nucleotide"] +AA_COLORS: dict[str, str] = _palette["amino_acid"] +MISMATCH_BG: str = _palette["rendering"]["mismatch_bg"] +INS_BG: str = _palette["rendering"]["insertion_bg"] +GAP_COLOR: str = _palette["rendering"]["gap_color"] +SEPARATOR_COLOR: str = _palette["rendering"]["separator_color"] +TEXT_COLOR: str = _palette["rendering"]["text_color"] +FALLBACK_BASE_COLOR: str = _palette["rendering"]["fallback_base_color"] +PANEL_LABEL_COLOR: str = _palette["rendering"]["panel_label_color"] + +FWD_ALPHA: float = _style["alpha"]["forward"] +REV_ALPHA: float = _style["alpha"]["reverse"] +FONT_PREFERENCES: list[dict[str, str]] = _style["font"]["preferences"] +FONT_FALLBACK_FILENAME: str = _style["font"]["fallback_filename"] diff --git a/src/tview/fasta.py b/src/tview/fasta.py new file mode 100644 index 0000000..3cad49b --- /dev/null +++ b/src/tview/fasta.py @@ -0,0 +1,112 @@ +"""FASTA parsing and panel construction.""" + +from __future__ import annotations + +from pathlib import Path + +from tview.models import Panel + + +def read_fasta(path: str | Path) -> list[tuple[str, str]]: + """Parse a FASTA file into a list of (name, sequence) tuples. + + Args: + path: Path to the FASTA file. + + Returns: + List of (header_name, concatenated_sequence) tuples. + + Examples: + >>> fasta = tmp_path / "test.fa" + >>> _ = fasta.write_text(">seq1\\nACGT\\n>seq2\\nTGCA\\n") + >>> read_fasta(fasta) + [('seq1', 'ACGT'), ('seq2', 'TGCA')] + >>> read_fasta(tmp_path / "empty.fa") + Traceback (most recent call last): + ... + FileNotFoundError: ... + """ + seqs: list[tuple[str, str]] = [] + name: str | None = None + buf: list[str] = [] + with open(path) as fh: + for line in fh: + if line.startswith(">"): + if name is not None: + seqs.append((name, "".join(buf))) + name = line[1:].strip() + buf = [] + else: + buf.append(line.strip()) + if name is not None: + seqs.append((name, "".join(buf))) + return seqs + + +def fasta_panel( + path: str | Path, + col_start: int | None = None, + col_end: int | None = None, +) -> Panel: + """Build a Panel from an aligned FASTA where the first sequence is the reference. + + Args: + path: Path to the aligned FASTA file. + col_start: 1-based inclusive start column for slicing the alignment. + col_end: 1-based inclusive end column for slicing the alignment. + + Returns: + A Panel with reference row, sequence rows, and column labels. + + Raises: + ValueError: If the FASTA file contains no sequences. + + Examples: + >>> fasta = tmp_path / "aln.fa" + >>> _ = fasta.write_text(">ref\\nACGT\\n>read1\\nACTT\\n>read2\\nA-GT\\n") + >>> p = fasta_panel(fasta) + >>> p.ref_row + ['A', 'C', 'G', 'T'] + >>> p.total_cols + 4 + >>> len(p.seq_rows) + 2 + >>> p.seq_rows[0] + ('read1', ['A', 'C', 'T', 'T'], False) + >>> p2 = fasta_panel(fasta, col_start=2, col_end=3) + >>> p2.ref_row + ['C', 'G'] + """ + seqs = read_fasta(path) + if not seqs: + raise ValueError(f"No sequences in {path}") + + _ref_name, ref_seq = seqs[0] + + # Slice columns if requested (1-based inclusive) + if col_start is not None or col_end is not None: + cs = (col_start or 1) - 1 + ce = col_end or len(ref_seq) + ref_seq = ref_seq[cs:ce] + seqs = [(n, s[cs:ce]) for n, s in seqs] + + aln_len = len(ref_seq) + ref_row = list(ref_seq.upper()) + + seq_rows: list[tuple[str, list[str], bool]] = [] + for name, seq in seqs[1:]: + row = list(seq.upper()[:aln_len]) + row += ["-"] * (aln_len - len(row)) + seq_rows.append((name, row, False)) + + # Column labels: 1-based position in the reference (skip gap columns) + col_labels: list[tuple[int, str]] = [] + ref_pos = 0 + for i, base in enumerate(ref_row): + if base != "-": + ref_pos += 1 + if ref_pos == 1 or ref_pos % 10 == 0: + col_labels.append((i, str(ref_pos))) + + label = Path(path).stem + return Panel(label, ref_row, seq_rows, aln_len, col_labels) diff --git a/src/tview/models.py b/src/tview/models.py new file mode 100644 index 0000000..4ed586d --- /dev/null +++ b/src/tview/models.py @@ -0,0 +1,37 @@ +"""Data structures for tview alignment panels.""" + +from __future__ import annotations + +from dataclasses import dataclass, field + + +@dataclass +class Panel: + """One horizontal alignment block: a reference row + read/sequence rows. + + Attributes: + label: Display name for the panel (e.g. filename stem). + ref_row: Reference sequence as a list of single-character strings. + seq_rows: Read/sequence rows as (name, bases, is_reverse) tuples. + total_cols: Total number of display columns including insertion columns. + col_labels: Tick positions and labels for the x-axis as (column_index, label) pairs. + ins_columns: Column indices that represent insertion positions. + + Examples: + >>> p = Panel("test", ["A", "C"], [("r1", ["A", "T"], False)], 2, [(0, "1")]) + >>> p.label + 'test' + >>> p.ins_columns + set() + """ + + label: str + ref_row: list[str] + seq_rows: list[tuple[str, list[str], bool]] + total_cols: int + col_labels: list[tuple[int, str]] + ins_columns: set[int] | None = None + + def __post_init__(self) -> None: + if self.ins_columns is None: + self.ins_columns = set() diff --git a/src/tview/palette.yaml b/src/tview/palette.yaml new file mode 100644 index 0000000..85839d4 --- /dev/null +++ b/src/tview/palette.yaml @@ -0,0 +1,41 @@ +nucleotide: + A: "#4CAF50" + C: "#2196F3" + G: "#FF9800" + T: "#F44336" + N: "#9E9E9E" + "-": "#9E9E9E" + +amino_acid: + A: "#2196F3" + V: "#2196F3" + L: "#2196F3" + I: "#2196F3" + M: "#2196F3" + F: "#2196F3" + W: "#2196F3" + P: "#2196F3" + K: "#F44336" + R: "#F44336" + H: "#F44336" + D: "#E040FB" + E: "#E040FB" + S: "#4CAF50" + T: "#4CAF50" + N: "#4CAF50" + Q: "#4CAF50" + G: "#FF9800" + C: "#FF9800" + Y: "#FF9800" + "*": "#9E9E9E" + "-": "#9E9E9E" + X: "#9E9E9E" + +rendering: + mismatch_bg: "#FFEB3B55" + insertion_bg: "#CE93D833" + gap_color: "#9E9E9E" + separator_color: "#BDBDBD" + text_color: "#000000" + fallback_base_color: "#9E9E9E" + panel_label_color: "#616161" diff --git a/src/tview/renderer.py b/src/tview/renderer.py new file mode 100644 index 0000000..7e2790a --- /dev/null +++ b/src/tview/renderer.py @@ -0,0 +1,245 @@ +"""Rendering engine for alignment panels.""" + +from __future__ import annotations + +from pathlib import Path + +import matplotlib + +matplotlib.use("Agg") +import matplotlib.font_manager as fm +import matplotlib.pyplot as plt + +from tview.config import ( + AA_COLORS, + FALLBACK_BASE_COLOR, + FONT_FALLBACK_FILENAME, + FONT_PREFERENCES, + FWD_ALPHA, + INS_BG, + MISMATCH_BG, + NT_COLORS, + PANEL_LABEL_COLOR, + REV_ALPHA, + SEPARATOR_COLOR, + TEXT_COLOR, +) +from tview.models import Panel + + +def _resolve_font( + fontsize: float, +) -> tuple[fm.FontProperties, fm.FontProperties]: + """Resolve monospace font for alignment rendering. + + Tries each font in ``FONT_PREFERENCES`` (from style.yaml) in order, then + falls back to ``FONT_FALLBACK_FILENAME``. + + Args: + fontsize: Font size in points. + + Returns: + A tuple of (mono, mono_sm) FontProperties for base text and tick labels. + """ + for pref in FONT_PREFERENCES: + family = pref["family"] + weight = pref.get("weight", "normal") + found_path = fm.findfont(fm.FontProperties(family=family, style="normal")) + if family in found_path: + mono = fm.FontProperties(fname=found_path, size=fontsize, weight=weight) + mono_sm = fm.FontProperties(fname=found_path, size=fontsize, weight=weight) + return mono, mono_sm + + # Final fallback: probe for the fallback font file + mono_bold_path = fm.findfont( + fm.FontProperties(family="monospace", weight="bold", style="normal") + ) + if "Oblique" in mono_bold_path or "Italic" in mono_bold_path: + import glob as _gl + + candidates = _gl.glob( + str(Path(fm.findfont("monospace")).parent / FONT_FALLBACK_FILENAME) + ) + if candidates: + mono_bold_path = candidates[0] + + mono = fm.FontProperties(fname=mono_bold_path, size=fontsize) + mono_sm = fm.FontProperties(fname=mono_bold_path, size=fontsize) + return mono, mono_sm + + +def render_panels( + panels: list[Panel], + out_path: str | Path = "alignment.png", + fontsize: float = 12, + dpi: int = 600, + palette: str = "nt", + cell: float | None = None, + classic: bool = False, +) -> None: + """Render alignment panels to a publication-quality image file. + + Each panel is drawn as a reference row followed by read rows. Matches + are shown as dots (forward) or commas (reverse), mismatches are + highlighted with colored backgrounds, and insertion columns are shaded. + + Args: + panels: List of Panel objects to render vertically. + out_path: Output image path (format inferred from extension). + fontsize: Font size in points for base characters. + dpi: Output resolution in dots per inch. + palette: Color scheme, either ``"nt"`` for nucleotides or ``"aa"`` for amino acids. + cell: Cell size in inches. Defaults to fontsize / 72. + classic: When True, render in black-and-white with no color highlighting. + """ + if cell is None: + cell = fontsize / 72 # 1 pt = 1/72 inch -> cell fits one character + colors = AA_COLORS if palette == "aa" else NT_COLORS + + if classic: + colors = {k: "#000000" for k in colors} + mismatch_bg = "#FFFFFF00" + ins_bg = "#FFFFFF00" + else: + mismatch_bg = MISMATCH_BG + ins_bg = INS_BG + mono, mono_sm = _resolve_font(fontsize) + + # Compute total height: for each panel, 1 ref row + N seq rows + separator + max_cols = max(p.total_cols for p in panels) + total_rows = 0 + panel_y_offsets: list[int] = [] + for i, p in enumerate(panels): + panel_y_offsets.append(total_rows) + total_rows += 1 + len(p.seq_rows) + if i < len(panels) - 1: + total_rows += 1 + + fig_w = max(4, max_cols * cell + 0.5) + fig_h = max(1.0, total_rows * cell + 0.6) + fig, ax = plt.subplots(figsize=(fig_w, fig_h)) + ax.set_xlim(-0.5, max_cols - 0.5) + ax.set_ylim(total_rows - 0.5, -0.5) + ax.set_aspect("equal") + + for pi, panel in enumerate(panels): + y0 = panel_y_offsets[pi] + n_panel_rows = 1 + len(panel.seq_rows) + + # Shade insertion columns + for ic in panel.ins_columns: + ax.add_patch( + plt.Rectangle( + (ic - 0.5, y0 - 0.5), + 1, + n_panel_rows, + facecolor=ins_bg, + edgecolor="none", + zorder=0, + ) + ) + + # Reference row + for c, base in enumerate(panel.ref_row): + clr = TEXT_COLOR if base == "-" else colors.get(base, FALLBACK_BASE_COLOR) + ax.text( + c, y0, base, ha="center", va="center", fontproperties=mono, color=clr + ) + + # Sequence rows + for ri, (name, row, is_rev) in enumerate(panel.seq_rows): + y = y0 + 1 + ri + alpha = REV_ALPHA if is_rev else FWD_ALPHA + strand_char = "," if is_rev else "." + + for c, base in enumerate(row): + if base == " ": + continue + ref_base = panel.ref_row[c] if c < len(panel.ref_row) else "-" + + if base == "-": + ax.text( + c, + y, + "-", + ha="center", + va="center", + fontproperties=mono, + color=TEXT_COLOR, + alpha=alpha, + ) + elif base == ref_base: + ax.text( + c, + y, + strand_char, + ha="center", + va="center", + fontproperties=mono, + color=TEXT_COLOR, + alpha=alpha, + ) + else: + ax.add_patch( + plt.Rectangle( + (c - 0.5, y - 0.5), + 1, + 1, + facecolor=mismatch_bg, + edgecolor="none", + ) + ) + display = base.lower() if is_rev else base + ax.text( + c, + y, + display, + ha="center", + va="center", + fontproperties=mono, + color=colors.get(base, TEXT_COLOR), + alpha=alpha, + ) + + # Panel label (left side) + if len(panels) > 1: + ax.text( + -1.5, + y0 + n_panel_rows / 2 - 0.5, + panel.label, + ha="right", + va="center", + fontproperties=mono, + color=PANEL_LABEL_COLOR, + ) + + # Separator line between panels + if pi < len(panels) - 1: + sep_y = y0 + n_panel_rows + 0.0 + ax.axhline(y=sep_y, color=SEPARATOR_COLOR, lw=0.5, ls="-", xmin=0, xmax=1) + + # X-axis from first panel, placed on top + ax.xaxis.set_label_position("top") + ax.xaxis.tick_top() + first = panels[0] + tick_idx = [ci for ci, _ in first.col_labels] + tick_lbl = [lb for _, lb in first.col_labels] + ax.set_xticks(tick_idx) + ax.set_xticklabels(tick_lbl, rotation=0, ha="center", fontproperties=mono_sm) + ax.set_yticks([]) + ax.tick_params(axis="x", length=0, pad=2) + ax.tick_params(axis="y", length=0) + for spine in ax.spines.values(): + spine.set_visible(False) + + plt.subplots_adjust(left=0.01, right=0.99, top=0.92, bottom=0.01) + plt.savefig( + out_path, + dpi=dpi, + bbox_inches="tight", + pad_inches=0.05, + facecolor="white", + transparent=False, + ) + plt.close() + print(f"Saved: {out_path} ({dpi} dpi, {len(panels)} panel(s), " f"{max_cols} cols)") diff --git a/src/tview/style.yaml b/src/tview/style.yaml new file mode 100644 index 0000000..a12c88d --- /dev/null +++ b/src/tview/style.yaml @@ -0,0 +1,11 @@ +alpha: + forward: 1.0 + reverse: 0.85 + +font: + preferences: + - family: "Helvetica" + weight: "bold" + - family: "monospace" + weight: "bold" + fallback_filename: "DejaVuSansMono-Bold.ttf" diff --git a/src/tview/tview.py b/src/tview/tview.py index 6cdd2f9..fab26d4 100644 --- a/src/tview/tview.py +++ b/src/tview/tview.py @@ -1,656 +1,11 @@ -#!/usr/bin/env python3 -""" -tview_image.py -- Publication-quality alignment viewer. - -Supports BAM files (with ref FASTA) and pre-aligned FASTA (e.g. MAFFT output). -Multiple inputs can be stacked vertically in a single figure. - -Usage: - # Single BAM - python tview_image.py --bam sample.bam --ref ref.fa --region chr1:1-50 -o out.png - - # Stacked BAMs (shared ref + region) - python tview_image.py --bam a.bam b.bam --ref ref.fa --region chr1:1-50 -o out.png - - # Aligned FASTA (first sequence = reference) - python tview_image.py --fasta aligned.fasta -o out.png --palette aa - - # Stacked FASTAs - python tview_image.py --fasta group1.fasta group2.fasta -o out.png --palette aa +"""Backward-compatible re-export shim. - # FASTA with column range (1-based, inclusive) - python tview_image.py --fasta aligned.fasta --columns 1-120 -o out.png - - # Mix BAM and FASTA panels - python tview_image.py --bam a.bam --ref ref.fa --region chr1:1-50 --fasta aln.fasta -o out.png +All logic has moved to focused modules: models, fasta, bam, renderer. +This file preserves ``from tview.tview import ...`` for existing code. """ -from __future__ import annotations - -from collections import defaultdict -from dataclasses import dataclass, field -from pathlib import Path -from typing import Any - -import matplotlib - -matplotlib.use("Agg") -import matplotlib.font_manager as fm -import matplotlib.pyplot as plt - -# -- Color schemes ----------------------------------------------------- -NT_COLORS = { - "A": "#4CAF50", - "C": "#2196F3", - "G": "#FF9800", - "T": "#F44336", - "N": "#9E9E9E", - "-": "#9E9E9E", -} -AA_COLORS = { - "A": "#2196F3", - "V": "#2196F3", - "L": "#2196F3", - "I": "#2196F3", - "M": "#2196F3", - "F": "#2196F3", - "W": "#2196F3", - "P": "#2196F3", - "K": "#F44336", - "R": "#F44336", - "H": "#F44336", - "D": "#E040FB", - "E": "#E040FB", - "S": "#4CAF50", - "T": "#4CAF50", - "N": "#4CAF50", - "Q": "#4CAF50", - "G": "#FF9800", - "C": "#FF9800", - "Y": "#FF9800", - "*": "#9E9E9E", - "-": "#9E9E9E", - "X": "#9E9E9E", -} -MISMATCH_BG = "#FFEB3B55" -INS_BG = "#CE93D833" -GAP_COLOR = "#9E9E9E" -FWD_ALPHA = 1.0 -REV_ALPHA = 0.85 -SEPARATOR_COLOR = "#BDBDBD" - -# -- CIGAR operations -------------------------------------------------- -CIGAR_MATCH = 0 # M -CIGAR_INS = 1 # I -CIGAR_DEL = 2 # D -CIGAR_REF_SKIP = 3 # N -CIGAR_SOFT_CLIP = 4 # S -CIGAR_SEQ_MATCH = 7 # = -CIGAR_SEQ_MISMATCH = 8 # X - - -# ====================================================================== -# Data structures -- each input becomes a Panel -# ====================================================================== -@dataclass -class Panel: - """One horizontal alignment block: a reference row + read/sequence rows. - - Attributes: - label: Display name for the panel (e.g. filename stem). - ref_row: Reference sequence as a list of single-character strings. - seq_rows: Read/sequence rows as (name, bases, is_reverse) tuples. - total_cols: Total number of display columns including insertion columns. - col_labels: Tick positions and labels for the x-axis as (column_index, label) pairs. - ins_columns: Column indices that represent insertion positions. - - Examples: - >>> p = Panel("test", ["A", "C"], [("r1", ["A", "T"], False)], 2, [(0, "1")]) - >>> p.label - 'test' - >>> p.ins_columns - set() - """ - - label: str - ref_row: list[str] - seq_rows: list[tuple[str, list[str], bool]] - total_cols: int - col_labels: list[tuple[int, str]] - ins_columns: set[int] | None = None - - def __post_init__(self) -> None: - if self.ins_columns is None: - self.ins_columns = set() - - -# ====================================================================== -# FASTA panel builder -# ====================================================================== -def read_fasta(path: str | Path) -> list[tuple[str, str]]: - """Parse a FASTA file into a list of (name, sequence) tuples. - - Args: - path: Path to the FASTA file. - - Returns: - List of (header_name, concatenated_sequence) tuples. - - Examples: - >>> fasta = tmp_path / "test.fa" - >>> _ = fasta.write_text(">seq1\\nACGT\\n>seq2\\nTGCA\\n") - >>> read_fasta(fasta) - [('seq1', 'ACGT'), ('seq2', 'TGCA')] - >>> read_fasta(tmp_path / "empty.fa") - Traceback (most recent call last): - ... - FileNotFoundError: ... - """ - seqs: list[tuple[str, str]] = [] - name: str | None = None - buf: list[str] = [] - with open(path) as fh: - for line in fh: - if line.startswith(">"): - if name is not None: - seqs.append((name, "".join(buf))) - name = line[1:].strip() - buf = [] - else: - buf.append(line.strip()) - if name is not None: - seqs.append((name, "".join(buf))) - return seqs - - -def fasta_panel( - path: str | Path, - col_start: int | None = None, - col_end: int | None = None, -) -> Panel: - """Build a Panel from an aligned FASTA where the first sequence is the reference. - - Args: - path: Path to the aligned FASTA file. - col_start: 1-based inclusive start column for slicing the alignment. - col_end: 1-based inclusive end column for slicing the alignment. - - Returns: - A Panel with reference row, sequence rows, and column labels. - - Raises: - ValueError: If the FASTA file contains no sequences. - - Examples: - >>> fasta = tmp_path / "aln.fa" - >>> _ = fasta.write_text(">ref\\nACGT\\n>read1\\nACTT\\n>read2\\nA-GT\\n") - >>> p = fasta_panel(fasta) - >>> p.ref_row - ['A', 'C', 'G', 'T'] - >>> p.total_cols - 4 - >>> len(p.seq_rows) - 2 - >>> p.seq_rows[0] - ('read1', ['A', 'C', 'T', 'T'], False) - >>> p2 = fasta_panel(fasta, col_start=2, col_end=3) - >>> p2.ref_row - ['C', 'G'] - """ - seqs = read_fasta(path) - if not seqs: - raise ValueError(f"No sequences in {path}") - - _ref_name, ref_seq = seqs[0] - - # Slice columns if requested (1-based inclusive) - if col_start is not None or col_end is not None: - cs = (col_start or 1) - 1 - ce = col_end or len(ref_seq) - ref_seq = ref_seq[cs:ce] - seqs = [(n, s[cs:ce]) for n, s in seqs] - - aln_len = len(ref_seq) - ref_row = list(ref_seq.upper()) - - seq_rows: list[tuple[str, list[str], bool]] = [] - for name, seq in seqs[1:]: - row = list(seq.upper()[:aln_len]) - row += ["-"] * (aln_len - len(row)) - seq_rows.append((name, row, False)) - - # Column labels: 1-based position in the reference (skip gap columns) - col_labels: list[tuple[int, str]] = [] - ref_pos = 0 - for i, base in enumerate(ref_row): - if base != "-": - ref_pos += 1 - if ref_pos == 1 or ref_pos % 10 == 0: - col_labels.append((i, str(ref_pos))) - - label = Path(path).stem - return Panel(label, ref_row, seq_rows, aln_len, col_labels) - - -# ====================================================================== -# BAM panel builder -# ====================================================================== -def build_read_row( - read: Any, - ref_start: int, - ref_end: int, -) -> tuple[dict[int, str], dict[int, list[str]]]: - """Extract aligned bases and insertions from a single pysam read. - - Walks the CIGAR string to map query bases onto reference positions, - collecting insertions keyed by their anchor reference position. - - Args: - read: A pysam.AlignedSegment with cigartuples and query_sequence. - ref_start: 0-based start of the reference window (inclusive). - ref_end: 0-based end of the reference window (exclusive). - - Returns: - A tuple of (aligned, inserts) where aligned maps ref positions to - bases and inserts maps ref positions to lists of inserted bases. - """ - aligned: dict[int, str] = {} - inserts: dict[int, list[str]] = defaultdict(list) - qpos, rpos = 0, read.reference_start - for op, length in read.cigartuples: - if op in (CIGAR_MATCH, CIGAR_SEQ_MATCH, CIGAR_SEQ_MISMATCH): - for _ in range(length): - if ref_start <= rpos < ref_end: - aligned[rpos] = read.query_sequence[qpos].upper() - qpos += 1 - rpos += 1 - elif op == CIGAR_INS: - anchor = rpos - 1 - if ref_start <= anchor < ref_end: - for j in range(length): - inserts[anchor].append(read.query_sequence[qpos + j].upper()) - qpos += length - elif op == CIGAR_DEL: - for _ in range(length): - if ref_start <= rpos < ref_end: - aligned[rpos] = "-" - rpos += 1 - elif op == CIGAR_REF_SKIP: - rpos += length - elif op == CIGAR_SOFT_CLIP: - qpos += length - return aligned, inserts - - -def bam_panel(bam_path: str | Path, ref_path: str | Path, region: str) -> Panel: - """Build a Panel from a BAM file with reference FASTA and genomic region. - - Reads are sorted by start position and strand. Insertion columns are - expanded so all reads align on a common grid. - - Args: - bam_path: Path to the indexed BAM file. - ref_path: Path to the reference FASTA (must be indexed). - region: Genomic region string in "chrom:start-end" format (0-based start). - - Returns: - A Panel with reference row, read rows, insertion columns, and tick labels. - """ - import pysam - - chrom, rest = region.split(":") - start, end = [int(x) for x in rest.replace(",", "").split("-")] - - with pysam.FastaFile(ref_path) as fasta: - ref_seq = fasta.fetch(chrom, start, end).upper() - - with pysam.AlignmentFile(bam_path, "rb") as samfile: - reads = [ - r - for r in samfile.fetch(chrom, start, end) - if not r.is_unmapped and r.cigartuples - ] - reads.sort(key=lambda r: (r.reference_start, r.is_reverse)) - - # Find max insertion at each ref position - max_ins: dict[int, int] = defaultdict(int) - read_data = [] - for read in reads: - aligned, inserts = build_read_row(read, start, end) - read_data.append((read, aligned, inserts)) - for rpos, bases in inserts.items(): - max_ins[rpos] = max(max_ins[rpos], len(bases)) - - # Build column map - col_map: dict[int, int] = {} - ins_col_set: set[int] = set() - col = 0 - for rpos in range(start, end): - col_map[rpos] = col - col += 1 - n_ins = max_ins.get(rpos, 0) - for j in range(n_ins): - ins_col_set.add(col + j) - col += n_ins - total_cols = col - - # Build ref row with '-' in insertion columns - ref_row: list[str] = [] - for rpos in range(start, end): - ref_row.append(ref_seq[rpos - start]) - for _ in range(max_ins.get(rpos, 0)): - ref_row.append("-") - - # Build sequence rows - seq_rows: list[tuple[str, list[str], bool]] = [] - for read, aligned, inserts in read_data: - row = [" "] * total_cols - for rpos in range(start, end): - c = col_map[rpos] - if rpos in aligned: - row[c] = aligned[rpos] - if rpos in aligned or rpos in inserts: - n_ins = max_ins.get(rpos, 0) - read_ins = inserts.get(rpos, []) - for j in range(n_ins): - if j < len(read_ins): - row[c + 1 + j] = read_ins[j] - else: - row[c + 1 + j] = "-" - seq_rows.append((read.query_name, row, read.is_reverse)) - - # Column labels: 1-based relative, ticks at 1, 10, 20... - ref_width = end - start - tick_1based = [1] + list(range(10, ref_width + 1, 10)) - col_labels = [ - (col_map[start + p - 1], str(p)) for p in tick_1based if (start + p - 1) < end - ] - - label = Path(bam_path).stem - return Panel(label, ref_row, seq_rows, total_cols, col_labels, ins_col_set) - - -# ====================================================================== -# Renderer -# ====================================================================== -def _resolve_font( - fontsize: float, -) -> tuple[fm.FontProperties, fm.FontProperties]: - """Resolve monospace font for alignment rendering. - - Prefers Helvetica Bold (macOS) and falls back to DejaVu Sans Mono Bold. - - Args: - fontsize: Font size in points. - - Returns: - A tuple of (mono, mono_sm) FontProperties for base text and tick labels. - """ - helv_path = fm.findfont(fm.FontProperties(family="Helvetica", style="normal")) - if "Helvetica" in helv_path: - mono = fm.FontProperties(fname=helv_path, size=fontsize, weight="bold") - mono_sm = fm.FontProperties(fname=helv_path, size=fontsize, weight="bold") - return mono, mono_sm - - mono_bold_path = fm.findfont( - fm.FontProperties(family="monospace", weight="bold", style="normal") - ) - if "Oblique" in mono_bold_path or "Italic" in mono_bold_path: - import glob as _gl - - candidates = _gl.glob( - str(Path(fm.findfont("monospace")).parent / "DejaVuSansMono-Bold.ttf") - ) - if candidates: - mono_bold_path = candidates[0] - - mono = fm.FontProperties(fname=mono_bold_path, size=fontsize) - mono_sm = fm.FontProperties(fname=mono_bold_path, size=fontsize) - return mono, mono_sm - - -def render_panels( - panels: list[Panel], - out_path: str | Path = "alignment.png", - fontsize: float = 12, - dpi: int = 600, - palette: str = "nt", - cell: float | None = None, -) -> None: - """Render alignment panels to a publication-quality image file. - - Each panel is drawn as a reference row followed by read rows. Matches - are shown as dots (forward) or commas (reverse), mismatches are - highlighted with colored backgrounds, and insertion columns are shaded. - - Args: - panels: List of Panel objects to render vertically. - out_path: Output image path (format inferred from extension). - fontsize: Font size in points for base characters. - dpi: Output resolution in dots per inch. - palette: Color scheme, either ``"nt"`` for nucleotides or ``"aa"`` for amino acids. - cell: Cell size in inches. Defaults to fontsize / 72. - """ - if cell is None: - cell = fontsize / 72 # 1 pt = 1/72 inch -> cell fits one character - colors = AA_COLORS if palette == "aa" else NT_COLORS - mono, mono_sm = _resolve_font(fontsize) - - # Compute total height: for each panel, 1 ref row + N seq rows + separator - max_cols = max(p.total_cols for p in panels) - total_rows = 0 - panel_y_offsets: list[int] = [] - for i, p in enumerate(panels): - panel_y_offsets.append(total_rows) - total_rows += 1 + len(p.seq_rows) - if i < len(panels) - 1: - total_rows += 1 - - fig_w = max(4, max_cols * cell + 0.5) - fig_h = max(1.0, total_rows * cell + 0.6) - fig, ax = plt.subplots(figsize=(fig_w, fig_h)) - ax.set_xlim(-0.5, max_cols - 0.5) - ax.set_ylim(total_rows - 0.5, -0.5) - ax.set_aspect("equal") - - for pi, panel in enumerate(panels): - y0 = panel_y_offsets[pi] - n_panel_rows = 1 + len(panel.seq_rows) - - # Shade insertion columns - for ic in panel.ins_columns: - ax.add_patch( - plt.Rectangle( - (ic - 0.5, y0 - 0.5), - 1, - n_panel_rows, - facecolor=INS_BG, - edgecolor="none", - zorder=0, - ) - ) - - # Reference row - for c, base in enumerate(panel.ref_row): - clr = "#000000" if base == "-" else colors.get(base, "#9E9E9E") - ax.text( - c, y0, base, ha="center", va="center", fontproperties=mono, color=clr - ) - - # Sequence rows - for ri, (name, row, is_rev) in enumerate(panel.seq_rows): - y = y0 + 1 + ri - alpha = REV_ALPHA if is_rev else FWD_ALPHA - strand_char = "," if is_rev else "." - - for c, base in enumerate(row): - if base == " ": - continue - ref_base = panel.ref_row[c] if c < len(panel.ref_row) else "-" - - if base == "-": - ax.text( - c, - y, - "-", - ha="center", - va="center", - fontproperties=mono, - color="#000000", - alpha=alpha, - ) - elif base == ref_base: - ax.text( - c, - y, - strand_char, - ha="center", - va="center", - fontproperties=mono, - color="#000000", - alpha=alpha, - ) - else: - ax.add_patch( - plt.Rectangle( - (c - 0.5, y - 0.5), - 1, - 1, - facecolor=MISMATCH_BG, - edgecolor="none", - ) - ) - display = base.lower() if is_rev else base - ax.text( - c, - y, - display, - ha="center", - va="center", - fontproperties=mono, - color=colors.get(base, "#000000"), - alpha=alpha, - ) - - # Panel label (left side) - if len(panels) > 1: - ax.text( - -1.5, - y0 + n_panel_rows / 2 - 0.5, - panel.label, - ha="right", - va="center", - fontproperties=mono, - color="#616161", - ) - - # Separator line between panels - if pi < len(panels) - 1: - sep_y = y0 + n_panel_rows + 0.0 - ax.axhline(y=sep_y, color=SEPARATOR_COLOR, lw=0.5, ls="-", xmin=0, xmax=1) - - # X-axis from first panel, placed on top - ax.xaxis.set_label_position("top") - ax.xaxis.tick_top() - first = panels[0] - tick_idx = [ci for ci, _ in first.col_labels] - tick_lbl = [lb for _, lb in first.col_labels] - ax.set_xticks(tick_idx) - ax.set_xticklabels(tick_lbl, rotation=0, ha="center", fontproperties=mono_sm) - ax.set_yticks([]) - ax.tick_params(axis="x", length=0, pad=2) - ax.tick_params(axis="y", length=0) - for spine in ax.spines.values(): - spine.set_visible(False) - - plt.subplots_adjust(left=0.01, right=0.99, top=0.92, bottom=0.01) - plt.savefig( - out_path, - dpi=dpi, - bbox_inches="tight", - pad_inches=0.05, - facecolor="white", - transparent=False, - ) - plt.close() - print(f"Saved: {out_path} ({dpi} dpi, {len(panels)} panel(s), " f"{max_cols} cols)") - - -# ====================================================================== -# Doctest infrastructure -# ====================================================================== -def _run_doctests() -> int: - """Run doctests with temporary file fixtures. - - Returns: - Exit code: 0 if all pass, 1 if any fail. - """ - import doctest - import shutil - import sys - import tempfile - - tmp_dir = tempfile.mkdtemp() - tmp_path = Path(tmp_dir) - - try: - globs = { - "Path": Path, - "tmp_path": tmp_path, - } - results = doctest.testmod( - extraglobs=globs, - verbose="-v" in sys.argv, - optionflags=doctest.ELLIPSIS, - ) - - if results.failed == 0: - print(f"All {results.attempted} doctests passed.") - return 0 - else: - print(f"{results.failed}/{results.attempted} doctests failed.") - return 1 - finally: - shutil.rmtree(tmp_dir, ignore_errors=True) - - -def _should_skip_tests() -> bool: - """Check if tests should be skipped via flag or env var.""" - import os - import sys - - if "--skip-tests" in sys.argv: - sys.argv.remove("--skip-tests") - return True - if os.environ.get("DOCSTR_SKIP_TEST", "").lower() in ("1", "true", "yes"): - return True - return False - - -def _wants_test_only() -> bool: - """Check if user wants to run tests only (not CLI).""" - import sys - - for flag in ("--test", "--tests"): - if flag in sys.argv: - sys.argv.remove(flag) - return True - return False - - -if __name__ == "__main__": - import sys - - if _wants_test_only(): - sys.exit(_run_doctests()) - - if not _should_skip_tests(): - exit_code = _run_doctests() - if exit_code != 0: - print("Aborting: Fix failing doctests before running.") - sys.exit(exit_code) - print() - - from tview.cli import main - - main() +from tview.bam import bam_panel, build_read_row # noqa: F401 +from tview.config import AA_COLORS, NT_COLORS # noqa: F401 +from tview.fasta import fasta_panel, read_fasta # noqa: F401 +from tview.models import Panel # noqa: F401 +from tview.renderer import render_panels # noqa: F401 diff --git a/tests/output/all_gaps.png b/tests/output/all_gaps.png index 06ea05d..8ac4108 100644 Binary files a/tests/output/all_gaps.png and b/tests/output/all_gaps.png differ diff --git a/tests/output/all_matches.png b/tests/output/all_matches.png index e27112c..72e796b 100644 Binary files a/tests/output/all_matches.png and b/tests/output/all_matches.png differ diff --git a/tests/output/classic_mode_aa.png b/tests/output/classic_mode_aa.png new file mode 100644 index 0000000..f37b188 Binary files /dev/null and b/tests/output/classic_mode_aa.png differ diff --git a/tests/output/classic_mode_nt.png b/tests/output/classic_mode_nt.png new file mode 100644 index 0000000..a903722 Binary files /dev/null and b/tests/output/classic_mode_nt.png differ diff --git a/tests/output/cli_columns_1-8.png b/tests/output/cli_columns_1-8.png index 5a9c6c2..40b4498 100644 Binary files a/tests/output/cli_columns_1-8.png and b/tests/output/cli_columns_1-8.png differ diff --git a/tests/output/cli_fasta_mode.png b/tests/output/cli_fasta_mode.png index 5a9c6c2..40b4498 100644 Binary files a/tests/output/cli_fasta_mode.png and b/tests/output/cli_fasta_mode.png differ diff --git a/tests/output/cli_palette_aa.png b/tests/output/cli_palette_aa.png index 88fe27f..2ab698d 100644 Binary files a/tests/output/cli_palette_aa.png and b/tests/output/cli_palette_aa.png differ diff --git a/tests/output/dense_nt_mismatches.png b/tests/output/dense_nt_mismatches.png index 251a96b..07a0a53 100644 Binary files a/tests/output/dense_nt_mismatches.png and b/tests/output/dense_nt_mismatches.png differ diff --git a/tests/output/format_test.pdf b/tests/output/format_test.pdf index 512e1a8..eca4684 100644 Binary files a/tests/output/format_test.pdf and b/tests/output/format_test.pdf differ diff --git a/tests/output/format_test.svg b/tests/output/format_test.svg index a0e03e8..8e21e5a 100644 --- a/tests/output/format_test.svg +++ b/tests/output/format_test.svg @@ -6,11 +6,11 @@ - 2026-02-24T20:54:28.496688 + 2026-02-26T21:16:44.556607 image/svg+xml - Matplotlib v3.10.6, https://matplotlib.org/ + Matplotlib v3.10.8, https://matplotlib.org/ @@ -43,7 +43,7 @@ L 134.64 49.469375 L 134.64 82.229375 L 101.88 82.229375 z -" clip-path="url(#pf46b9afcc1)" style="fill: #ffeb3b; fill-opacity: 0.333333"/> +" clip-path="url(#pba91d56d39)" style="fill: #ffeb3b; fill-opacity: 0.333333"/> @@ -218,7 +218,7 @@ z - + diff --git a/tests/output/hiv_env_realistic.png b/tests/output/hiv_env_realistic.png index b03ce99..bcc45e7 100644 Binary files a/tests/output/hiv_env_realistic.png and b/tests/output/hiv_env_realistic.png differ diff --git a/tests/output/hypothesis_aa_001.png b/tests/output/hypothesis_aa_001.png index d3ca629..6b8c679 100644 Binary files a/tests/output/hypothesis_aa_001.png and b/tests/output/hypothesis_aa_001.png differ diff --git a/tests/output/hypothesis_aa_002.png b/tests/output/hypothesis_aa_002.png index f33fa8c..de21b08 100644 Binary files a/tests/output/hypothesis_aa_002.png and b/tests/output/hypothesis_aa_002.png differ diff --git a/tests/output/hypothesis_aa_003.png b/tests/output/hypothesis_aa_003.png index 47b8c1a..e04b01a 100644 Binary files a/tests/output/hypothesis_aa_003.png and b/tests/output/hypothesis_aa_003.png differ diff --git a/tests/output/hypothesis_aa_004.png b/tests/output/hypothesis_aa_004.png index 9e2f9b7..02c4927 100644 Binary files a/tests/output/hypothesis_aa_004.png and b/tests/output/hypothesis_aa_004.png differ diff --git a/tests/output/hypothesis_aa_005.png b/tests/output/hypothesis_aa_005.png index c6781a9..bbfe74d 100644 Binary files a/tests/output/hypothesis_aa_005.png and b/tests/output/hypothesis_aa_005.png differ diff --git a/tests/output/hypothesis_aa_006.png b/tests/output/hypothesis_aa_006.png index 99703f7..383bf9f 100644 Binary files a/tests/output/hypothesis_aa_006.png and b/tests/output/hypothesis_aa_006.png differ diff --git a/tests/output/hypothesis_aa_007.png b/tests/output/hypothesis_aa_007.png index 794e2d4..e6d3bae 100644 Binary files a/tests/output/hypothesis_aa_007.png and b/tests/output/hypothesis_aa_007.png differ diff --git a/tests/output/hypothesis_aa_008.png b/tests/output/hypothesis_aa_008.png index 6fe0fd8..b1fbeeb 100644 Binary files a/tests/output/hypothesis_aa_008.png and b/tests/output/hypothesis_aa_008.png differ diff --git a/tests/output/hypothesis_aa_009.png b/tests/output/hypothesis_aa_009.png index 9ea70e3..72ec45d 100644 Binary files a/tests/output/hypothesis_aa_009.png and b/tests/output/hypothesis_aa_009.png differ diff --git a/tests/output/hypothesis_aa_010.png b/tests/output/hypothesis_aa_010.png index 8abe908..c7e294c 100644 Binary files a/tests/output/hypothesis_aa_010.png and b/tests/output/hypothesis_aa_010.png differ diff --git a/tests/output/hypothesis_nt_001.png b/tests/output/hypothesis_nt_001.png index 586de7e..6804928 100644 Binary files a/tests/output/hypothesis_nt_001.png and b/tests/output/hypothesis_nt_001.png differ diff --git a/tests/output/hypothesis_nt_002.png b/tests/output/hypothesis_nt_002.png index 11d1bdc..d0b3d23 100644 Binary files a/tests/output/hypothesis_nt_002.png and b/tests/output/hypothesis_nt_002.png differ diff --git a/tests/output/hypothesis_nt_003.png b/tests/output/hypothesis_nt_003.png index de9bc5c..5273eea 100644 Binary files a/tests/output/hypothesis_nt_003.png and b/tests/output/hypothesis_nt_003.png differ diff --git a/tests/output/hypothesis_nt_004.png b/tests/output/hypothesis_nt_004.png index 7366856..abe6176 100644 Binary files a/tests/output/hypothesis_nt_004.png and b/tests/output/hypothesis_nt_004.png differ diff --git a/tests/output/hypothesis_nt_005.png b/tests/output/hypothesis_nt_005.png index 59ce984..57c3987 100644 Binary files a/tests/output/hypothesis_nt_005.png and b/tests/output/hypothesis_nt_005.png differ diff --git a/tests/output/hypothesis_nt_006.png b/tests/output/hypothesis_nt_006.png index 558c053..116c4e1 100644 Binary files a/tests/output/hypothesis_nt_006.png and b/tests/output/hypothesis_nt_006.png differ diff --git a/tests/output/hypothesis_nt_007.png b/tests/output/hypothesis_nt_007.png index adcc889..51db65a 100644 Binary files a/tests/output/hypothesis_nt_007.png and b/tests/output/hypothesis_nt_007.png differ diff --git a/tests/output/hypothesis_nt_008.png b/tests/output/hypothesis_nt_008.png index e2e11e5..a1f1499 100644 Binary files a/tests/output/hypothesis_nt_008.png and b/tests/output/hypothesis_nt_008.png differ diff --git a/tests/output/hypothesis_nt_009.png b/tests/output/hypothesis_nt_009.png index 8505448..ab47185 100644 Binary files a/tests/output/hypothesis_nt_009.png and b/tests/output/hypothesis_nt_009.png differ diff --git a/tests/output/hypothesis_nt_010.png b/tests/output/hypothesis_nt_010.png index 034be50..8d10c27 100644 Binary files a/tests/output/hypothesis_nt_010.png and b/tests/output/hypothesis_nt_010.png differ diff --git a/tests/output/mixed_bam_fasta.png b/tests/output/mixed_bam_fasta.png index 9221e3d..a6a6976 100644 Binary files a/tests/output/mixed_bam_fasta.png and b/tests/output/mixed_bam_fasta.png differ diff --git a/tests/output/stacked_panels.png b/tests/output/stacked_panels.png index 6eab9bf..1f4c017 100644 Binary files a/tests/output/stacked_panels.png and b/tests/output/stacked_panels.png differ diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..a4f563a --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,134 @@ +"""Tests for YAML config loading and backward-compatible constants.""" + +from __future__ import annotations + +import re + +import pytest + +from tview.config import ( + AA_COLORS, + FALLBACK_BASE_COLOR, + FONT_FALLBACK_FILENAME, + FONT_PREFERENCES, + FWD_ALPHA, + GAP_COLOR, + INS_BG, + MISMATCH_BG, + NT_COLORS, + PANEL_LABEL_COLOR, + REV_ALPHA, + SEPARATOR_COLOR, + TEXT_COLOR, + load_palette, + load_style, +) + +HEX_RE = re.compile(r"^#[0-9A-Fa-f]{6}([0-9A-Fa-f]{2})?$") + + +class TestLoadPalette: + def test_has_required_keys(self): + palette = load_palette() + assert "nucleotide" in palette + assert "amino_acid" in palette + assert "rendering" in palette + + def test_nucleotide_keys(self): + palette = load_palette() + nt = palette["nucleotide"] + for base in ("A", "C", "G", "T", "N", "-"): + assert base in nt, f"Missing nucleotide key: {base}" + + def test_amino_acid_keys(self): + palette = load_palette() + aa = palette["amino_acid"] + expected = set("AVLIMFWPKRHDESTNGCYX*-") + assert expected.issubset(set(aa.keys())) + + def test_rendering_keys(self): + palette = load_palette() + rendering = palette["rendering"] + for key in ( + "mismatch_bg", + "insertion_bg", + "gap_color", + "separator_color", + "text_color", + "fallback_base_color", + "panel_label_color", + ): + assert key in rendering, f"Missing rendering key: {key}" + + def test_all_colors_are_hex(self): + palette = load_palette() + for section in ("nucleotide", "amino_acid"): + for key, val in palette[section].items(): + assert HEX_RE.match(val), f"{section}.{key} not valid hex: {val}" + for key, val in palette["rendering"].items(): + assert HEX_RE.match(val), f"rendering.{key} not valid hex: {val}" + + +class TestLoadStyle: + def test_has_required_keys(self): + style = load_style() + assert "alpha" in style + assert "font" in style + + def test_alpha_values_are_floats(self): + style = load_style() + assert isinstance(style["alpha"]["forward"], float) + assert isinstance(style["alpha"]["reverse"], float) + + def test_alpha_range(self): + style = load_style() + for key in ("forward", "reverse"): + val = style["alpha"][key] + assert 0.0 <= val <= 1.0, f"alpha.{key} out of range: {val}" + + def test_font_preferences_structure(self): + style = load_style() + prefs = style["font"]["preferences"] + assert isinstance(prefs, list) + assert len(prefs) >= 1 + for pref in prefs: + assert "family" in pref + + def test_fallback_filename(self): + style = load_style() + assert style["font"]["fallback_filename"].endswith(".ttf") + + +class TestBackwardCompatibleConstants: + def test_nt_colors_values(self): + assert NT_COLORS["A"] == "#4CAF50" + assert NT_COLORS["C"] == "#2196F3" + assert NT_COLORS["G"] == "#FF9800" + assert NT_COLORS["T"] == "#F44336" + assert NT_COLORS["N"] == "#9E9E9E" + assert NT_COLORS["-"] == "#9E9E9E" + + def test_aa_colors_values(self): + assert AA_COLORS["K"] == "#F44336" + assert AA_COLORS["D"] == "#E040FB" + assert AA_COLORS["S"] == "#4CAF50" + assert AA_COLORS["G"] == "#FF9800" + assert AA_COLORS["*"] == "#9E9E9E" + + def test_rendering_constants(self): + assert MISMATCH_BG == "#FFEB3B55" + assert INS_BG == "#CE93D833" + assert GAP_COLOR == "#9E9E9E" + assert SEPARATOR_COLOR == "#BDBDBD" + assert TEXT_COLOR == "#000000" + assert FALLBACK_BASE_COLOR == "#9E9E9E" + assert PANEL_LABEL_COLOR == "#616161" + + def test_alpha_constants(self): + assert FWD_ALPHA == 1.0 + assert REV_ALPHA == 0.85 + + def test_font_constants(self): + assert isinstance(FONT_PREFERENCES, list) + assert FONT_PREFERENCES[0]["family"] == "Helvetica" + assert FONT_FALLBACK_FILENAME == "DejaVuSansMono-Bold.ttf" diff --git a/tests/test_fasta.py b/tests/test_fasta.py index 1bcb1a9..0cf8bb0 100644 --- a/tests/test_fasta.py +++ b/tests/test_fasta.py @@ -10,7 +10,9 @@ from hypothesis import given, settings from hypothesis import strategies as st -from tview.tview import Panel, fasta_panel, read_fasta, render_panels +from tview.fasta import fasta_panel, read_fasta +from tview.models import Panel +from tview.renderer import render_panels from .conftest import OUTPUT_DIR @@ -278,6 +280,26 @@ def test_dense_nt_alignment(self, tmp_path, output_dir): render_panels([panel], str(out), palette="nt", dpi=150) assert out.exists() + def test_classic_mode_nt(self, tmp_path, output_dir): + """Classic mode renders black-and-white NT alignment without crashing.""" + f = tmp_path / "classic_nt.fasta" + f.write_text(">ref\nACGTACGT\n>s1\nACCTACGA\n>s2\nACGTACGT\n") + panel = fasta_panel(str(f)) + out = output_dir / "classic_mode_nt.png" + render_panels([panel], str(out), palette="nt", dpi=150, classic=True) + assert out.exists() + assert out.stat().st_size > 0 + + def test_classic_mode_aa(self, tmp_path, output_dir): + """Classic mode renders black-and-white AA alignment without crashing.""" + f = tmp_path / "classic_aa.fasta" + f.write_text(">ref\nMRVKEKYQ\n>s1\nMRVGEKYQ\n>s2\nMRVKDKYQ\n") + panel = fasta_panel(str(f)) + out = output_dir / "classic_mode_aa.png" + render_panels([panel], str(out), palette="aa", dpi=150, classic=True) + assert out.exists() + assert out.stat().st_size > 0 + # ── Panel dataclass sanity ────────────────────────────────────────