Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ Publication-quality alignment viewer for nucleotide and amino acid sequences. A

Supports **BAM files** (with reference FASTA), **pre-aligned FASTA** (e.g. MAFFT output), and **stacking** multiple inputs into a single figure.

![BAM with indels](examples/indel_alignment.png)
![BAM with indels](https://raw.githubusercontent.com/tmsincomb/tview/main/examples/indel_alignment.png)
*BAM mode — SNP (yellow), 3bp deletion, 2bp insertion (purple columns), reverse-strand insertion*

![FASTA amino acid alignment](examples/fasta_env_1-120.png)
![FASTA amino acid alignment](https://raw.githubusercontent.com/tmsincomb/tview/main/examples/fasta_env_1-120.png)
*FASTA mode — HIV Env protein alignment (HxB2 reference), amino acid palette*

![Stacked BAMs](examples/stacked_bam.png)
![Stacked BAMs](https://raw.githubusercontent.com/tmsincomb/tview/main/examples/stacked_bam.png)
*Stacked mode — two BAM files sharing a reference and region*

---
Expand Down Expand Up @@ -60,6 +60,18 @@ tview \
-o first_120_cols.png
```

### Classic (black-and-white) mode

Use `--classic-mode` for textbook-style monochrome output — all black text on a white background with no colored highlighting. Structural conventions (`.` `,` lowercase, `-`) are preserved.

```bash
tview \
--fasta aligned.fasta \
--palette aa \
--classic-mode \
-o classic_output.png
```

---

## Stacking Multiple Panels
Expand Down Expand Up @@ -145,6 +157,10 @@ panels = [
bam_panel("sample2.bam", "ref.fa", "chr1:100-200"),
]
render_panels(panels, "stacked.png", dpi=300, fontsize=7, cell=0.14)

# Classic (black-and-white) mode
panel = fasta_panel("aligned.fasta")
render_panels([panel], "classic.png", palette="aa", classic=True)
```

---
Expand Down Expand Up @@ -205,6 +221,7 @@ Options:
--dpi INTEGER Image resolution. [default: 300]
--fontsize INTEGER Base font size in points. [default: 7]
--cell FLOAT Cell size in inches. [default: 0.14]
--classic-mode Black-and-white rendering with no color highlighting.
-h, --help Show this message and exit.
```

Expand All @@ -220,6 +237,7 @@ Options:
| `--dpi` | Image resolution | `300` |
| `--fontsize` | Base font size in points | `7` |
| `--cell` | Cell size in inches (controls spacing) | `0.14` |
| `--classic-mode` | Black-and-white rendering with no color highlighting | `False` |

---

Expand All @@ -230,6 +248,7 @@ Options:
- Use `--fontsize 5` or `6` when displaying wide alignments (>100 columns).
- The output format is determined by the file extension: `.png`, `.pdf`, `.svg` all work.
- For Nature-style figures, `.pdf` or `.svg` output preserves vector text.
- Use `--classic-mode` for textbook-style monochrome figures that reproduce well in grayscale print.

```bash
# Vector output for publication
Expand Down
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ classifiers = [
dependencies = [
"matplotlib>=3.5",
"click>=8.0",
"pyyaml>=6.0",
"pysam>=0.20; sys_platform != 'win32'",
]

Expand All @@ -47,6 +48,9 @@ Issues = "https://github.com/MurrellGroup/tview/issues"
[project.scripts]
tview = "tview.cli:main"

[tool.setuptools.package-data]
tview = ["*.yaml"]

[tool.setuptools.packages.find]
where = ["src"]

Expand Down
14 changes: 5 additions & 9 deletions src/tview/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,11 @@

from importlib.metadata import PackageNotFoundError, version

from tview.tview import (
AA_COLORS,
NT_COLORS,
Panel,
bam_panel,
fasta_panel,
read_fasta,
render_panels,
)
from tview.bam import bam_panel
from tview.config import AA_COLORS, NT_COLORS
from tview.fasta import fasta_panel, read_fasta
from tview.models import Panel
from tview.renderer import render_panels

try:
__version__ = version("tview")
Expand Down
6 changes: 3 additions & 3 deletions src/tview/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
commit_id: COMMIT_ID
__commit_id__: COMMIT_ID

__version__ = version = "0.1.dev2+g65baca9ef.d20260225"
__version_tuple__ = version_tuple = (0, 1, "dev2", "g65baca9ef.d20260225")
__version__ = version = "0.1.2.dev1"
__version_tuple__ = version_tuple = (0, 1, 2, "dev1")

__commit_id__ = commit_id = "g65baca9ef"
__commit_id__ = commit_id = "g8d3de9eec"
153 changes: 153 additions & 0 deletions src/tview/bam.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
"""BAM parsing and panel construction."""

from __future__ import annotations

from collections import defaultdict
from pathlib import Path
from typing import Any

from tview.models import Panel

# -- CIGAR operations --------------------------------------------------
CIGAR_MATCH = 0 # M
CIGAR_INS = 1 # I
CIGAR_DEL = 2 # D
CIGAR_REF_SKIP = 3 # N
CIGAR_SOFT_CLIP = 4 # S
CIGAR_SEQ_MATCH = 7 # =
CIGAR_SEQ_MISMATCH = 8 # X


def build_read_row(
read: Any,
ref_start: int,
ref_end: int,
) -> tuple[dict[int, str], dict[int, list[str]]]:
"""Extract aligned bases and insertions from a single pysam read.

Walks the CIGAR string to map query bases onto reference positions,
collecting insertions keyed by their anchor reference position.

Args:
read: A pysam.AlignedSegment with cigartuples and query_sequence.
ref_start: 0-based start of the reference window (inclusive).
ref_end: 0-based end of the reference window (exclusive).

Returns:
A tuple of (aligned, inserts) where aligned maps ref positions to
bases and inserts maps ref positions to lists of inserted bases.
"""
aligned: dict[int, str] = {}
inserts: dict[int, list[str]] = defaultdict(list)
qpos, rpos = 0, read.reference_start
for op, length in read.cigartuples:
if op in (CIGAR_MATCH, CIGAR_SEQ_MATCH, CIGAR_SEQ_MISMATCH):
for _ in range(length):
if ref_start <= rpos < ref_end:
aligned[rpos] = read.query_sequence[qpos].upper()
qpos += 1
rpos += 1
elif op == CIGAR_INS:
anchor = rpos - 1
if ref_start <= anchor < ref_end:
for j in range(length):
inserts[anchor].append(read.query_sequence[qpos + j].upper())
qpos += length
elif op == CIGAR_DEL:
for _ in range(length):
if ref_start <= rpos < ref_end:
aligned[rpos] = "-"
rpos += 1
elif op == CIGAR_REF_SKIP:
rpos += length
elif op == CIGAR_SOFT_CLIP:
qpos += length
return aligned, inserts


def bam_panel(bam_path: str | Path, ref_path: str | Path, region: str) -> Panel:
"""Build a Panel from a BAM file with reference FASTA and genomic region.

Reads are sorted by start position and strand. Insertion columns are
expanded so all reads align on a common grid.

Args:
bam_path: Path to the indexed BAM file.
ref_path: Path to the reference FASTA (must be indexed).
region: Genomic region string in "chrom:start-end" format (0-based start).

Returns:
A Panel with reference row, read rows, insertion columns, and tick labels.
"""
import pysam

chrom, rest = region.split(":")
start, end = [int(x) for x in rest.replace(",", "").split("-")]

with pysam.FastaFile(ref_path) as fasta:
ref_seq = fasta.fetch(chrom, start, end).upper()

with pysam.AlignmentFile(bam_path, "rb") as samfile:
reads = [
r
for r in samfile.fetch(chrom, start, end)
if not r.is_unmapped and r.cigartuples
]
reads.sort(key=lambda r: (r.reference_start, r.is_reverse))

# Find max insertion at each ref position
max_ins: dict[int, int] = defaultdict(int)
read_data = []
for read in reads:
aligned, inserts = build_read_row(read, start, end)
read_data.append((read, aligned, inserts))
for rpos, bases in inserts.items():
max_ins[rpos] = max(max_ins[rpos], len(bases))

# Build column map
col_map: dict[int, int] = {}
ins_col_set: set[int] = set()
col = 0
for rpos in range(start, end):
col_map[rpos] = col
col += 1
n_ins = max_ins.get(rpos, 0)
for j in range(n_ins):
ins_col_set.add(col + j)
col += n_ins
total_cols = col

# Build ref row with '-' in insertion columns
ref_row: list[str] = []
for rpos in range(start, end):
ref_row.append(ref_seq[rpos - start])
for _ in range(max_ins.get(rpos, 0)):
ref_row.append("-")

# Build sequence rows
seq_rows: list[tuple[str, list[str], bool]] = []
for read, aligned, inserts in read_data:
row = [" "] * total_cols
for rpos in range(start, end):
c = col_map[rpos]
if rpos in aligned:
row[c] = aligned[rpos]
if rpos in aligned or rpos in inserts:
n_ins = max_ins.get(rpos, 0)
read_ins = inserts.get(rpos, [])
for j in range(n_ins):
if j < len(read_ins):
row[c + 1 + j] = read_ins[j]
else:
row[c + 1 + j] = "-"
seq_rows.append((read.query_name, row, read.is_reverse))

# Column labels: 1-based relative, ticks at 1, 10, 20...
ref_width = end - start
tick_1based = [1] + list(range(10, ref_width + 1, 10))
col_labels = [
(col_map[start + p - 1], str(p)) for p in tick_1based if (start + p - 1) < end
]

label = Path(bam_path).stem
return Panel(label, ref_row, seq_rows, total_cols, col_labels, ins_col_set)
22 changes: 19 additions & 3 deletions src/tview/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@

import click

from tview.tview import bam_panel, fasta_panel, render_panels
from tview.bam import bam_panel
from tview.fasta import fasta_panel
from tview.renderer import render_panels


def _expand_stdin(paths: list[str]) -> list[str]:
Expand Down Expand Up @@ -65,7 +67,15 @@ def _expand_stdin(paths: list[str]) -> list[str]:
@click.option(
"--cell", type=float, default=0.14, show_default=True, help="Cell size in inches."
)
def main(bam, ref, region, fasta, columns, output, palette, dpi, fontsize, cell):
@click.option(
"--classic-mode",
is_flag=True,
default=False,
help="Black-and-white rendering with no color highlighting.",
)
def main(
bam, ref, region, fasta, columns, output, palette, dpi, fontsize, cell, classic_mode
):
"""Publication-quality alignment viewer (BAM or FASTA).

Supports BAM files (with reference FASTA), pre-aligned FASTA (e.g. MAFFT
Expand Down Expand Up @@ -95,5 +105,11 @@ def main(bam, ref, region, fasta, columns, output, palette, dpi, fontsize, cell)
panels.append(fasta_panel(fasta_path, col_start, col_end))

render_panels(
panels, output, fontsize=fontsize, dpi=dpi, palette=palette, cell=cell
panels,
output,
fontsize=fontsize,
dpi=dpi,
palette=palette,
cell=cell,
classic=classic_mode,
)
57 changes: 57 additions & 0 deletions src/tview/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""Configuration loader for tview color palettes and style settings.

Reads palette.yaml and style.yaml from the package directory and exposes
backward-compatible module-level constants.
"""

from __future__ import annotations

from functools import lru_cache
from pathlib import Path
from typing import Any

import yaml

_PKG_DIR = Path(__file__).parent


@lru_cache(maxsize=1)
def load_palette() -> dict[str, Any]:
"""Load color palette definitions from palette.yaml.

Returns:
Parsed YAML dict with ``nucleotide``, ``amino_acid``, and ``rendering`` keys.
"""
with open(_PKG_DIR / "palette.yaml") as fh:
return yaml.safe_load(fh)


@lru_cache(maxsize=1)
def load_style() -> dict[str, Any]:
"""Load style settings from style.yaml.

Returns:
Parsed YAML dict with ``alpha`` and ``font`` keys.
"""
with open(_PKG_DIR / "style.yaml") as fh:
return yaml.safe_load(fh)


# -- Backward-compatible constants -----------------------------------------
_palette = load_palette()
_style = load_style()

NT_COLORS: dict[str, str] = _palette["nucleotide"]
AA_COLORS: dict[str, str] = _palette["amino_acid"]
MISMATCH_BG: str = _palette["rendering"]["mismatch_bg"]
INS_BG: str = _palette["rendering"]["insertion_bg"]
GAP_COLOR: str = _palette["rendering"]["gap_color"]
SEPARATOR_COLOR: str = _palette["rendering"]["separator_color"]
TEXT_COLOR: str = _palette["rendering"]["text_color"]
FALLBACK_BASE_COLOR: str = _palette["rendering"]["fallback_base_color"]
PANEL_LABEL_COLOR: str = _palette["rendering"]["panel_label_color"]

FWD_ALPHA: float = _style["alpha"]["forward"]
REV_ALPHA: float = _style["alpha"]["reverse"]
FONT_PREFERENCES: list[dict[str, str]] = _style["font"]["preferences"]
FONT_FALLBACK_FILENAME: str = _style["font"]["fallback_filename"]
Loading
Loading