Skip to content

Commit 955e72a

Browse files
authored
Merge pull request #1 from tmsincomb/dev
Dev
2 parents 8d3de9e + 94c659f commit 955e72a

48 files changed

Lines changed: 878 additions & 676 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

README.md

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@ Publication-quality alignment viewer for nucleotide and amino acid sequences. A
44

55
Supports **BAM files** (with reference FASTA), **pre-aligned FASTA** (e.g. MAFFT output), and **stacking** multiple inputs into a single figure.
66

7-
![BAM with indels](examples/indel_alignment.png)
7+
![BAM with indels](https://raw.githubusercontent.com/tmsincomb/tview/main/examples/indel_alignment.png)
88
*BAM mode — SNP (yellow), 3bp deletion, 2bp insertion (purple columns), reverse-strand insertion*
99

10-
![FASTA amino acid alignment](examples/fasta_env_1-120.png)
10+
![FASTA amino acid alignment](https://raw.githubusercontent.com/tmsincomb/tview/main/examples/fasta_env_1-120.png)
1111
*FASTA mode — HIV Env protein alignment (HxB2 reference), amino acid palette*
1212

13-
![Stacked BAMs](examples/stacked_bam.png)
13+
![Stacked BAMs](https://raw.githubusercontent.com/tmsincomb/tview/main/examples/stacked_bam.png)
1414
*Stacked mode — two BAM files sharing a reference and region*
1515

1616
---
@@ -60,6 +60,18 @@ tview \
6060
-o first_120_cols.png
6161
```
6262

63+
### Classic (black-and-white) mode
64+
65+
Use `--classic-mode` for textbook-style monochrome output — all black text on a white background with no colored highlighting. Structural conventions (`.` `,` lowercase, `-`) are preserved.
66+
67+
```bash
68+
tview \
69+
--fasta aligned.fasta \
70+
--palette aa \
71+
--classic-mode \
72+
-o classic_output.png
73+
```
74+
6375
---
6476

6577
## Stacking Multiple Panels
@@ -145,6 +157,10 @@ panels = [
145157
bam_panel("sample2.bam", "ref.fa", "chr1:100-200"),
146158
]
147159
render_panels(panels, "stacked.png", dpi=300, fontsize=7, cell=0.14)
160+
161+
# Classic (black-and-white) mode
162+
panel = fasta_panel("aligned.fasta")
163+
render_panels([panel], "classic.png", palette="aa", classic=True)
148164
```
149165

150166
---
@@ -205,6 +221,7 @@ Options:
205221
--dpi INTEGER Image resolution. [default: 300]
206222
--fontsize INTEGER Base font size in points. [default: 7]
207223
--cell FLOAT Cell size in inches. [default: 0.14]
224+
--classic-mode Black-and-white rendering with no color highlighting.
208225
-h, --help Show this message and exit.
209226
```
210227

@@ -220,6 +237,7 @@ Options:
220237
| `--dpi` | Image resolution | `300` |
221238
| `--fontsize` | Base font size in points | `7` |
222239
| `--cell` | Cell size in inches (controls spacing) | `0.14` |
240+
| `--classic-mode` | Black-and-white rendering with no color highlighting | `False` |
223241

224242
---
225243

@@ -230,6 +248,7 @@ Options:
230248
- Use `--fontsize 5` or `6` when displaying wide alignments (>100 columns).
231249
- The output format is determined by the file extension: `.png`, `.pdf`, `.svg` all work.
232250
- For Nature-style figures, `.pdf` or `.svg` output preserves vector text.
251+
- Use `--classic-mode` for textbook-style monochrome figures that reproduce well in grayscale print.
233252

234253
```bash
235254
# Vector output for publication

pyproject.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ classifiers = [
3030
dependencies = [
3131
"matplotlib>=3.5",
3232
"click>=8.0",
33+
"pyyaml>=6.0",
3334
"pysam>=0.20; sys_platform != 'win32'",
3435
]
3536

@@ -47,6 +48,9 @@ Issues = "https://github.com/MurrellGroup/tview/issues"
4748
[project.scripts]
4849
tview = "tview.cli:main"
4950

51+
[tool.setuptools.package-data]
52+
tview = ["*.yaml"]
53+
5054
[tool.setuptools.packages.find]
5155
where = ["src"]
5256

src/tview/__init__.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,11 @@
22

33
from importlib.metadata import PackageNotFoundError, version
44

5-
from tview.tview import (
6-
AA_COLORS,
7-
NT_COLORS,
8-
Panel,
9-
bam_panel,
10-
fasta_panel,
11-
read_fasta,
12-
render_panels,
13-
)
5+
from tview.bam import bam_panel
6+
from tview.config import AA_COLORS, NT_COLORS
7+
from tview.fasta import fasta_panel, read_fasta
8+
from tview.models import Panel
9+
from tview.renderer import render_panels
1410

1511
try:
1612
__version__ = version("tview")

src/tview/_version.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
commit_id: COMMIT_ID
2828
__commit_id__: COMMIT_ID
2929

30-
__version__ = version = "0.1.dev2+g65baca9ef.d20260225"
31-
__version_tuple__ = version_tuple = (0, 1, "dev2", "g65baca9ef.d20260225")
30+
__version__ = version = "0.1.2.dev1"
31+
__version_tuple__ = version_tuple = (0, 1, 2, "dev1")
3232

33-
__commit_id__ = commit_id = "g65baca9ef"
33+
__commit_id__ = commit_id = "g8d3de9eec"

src/tview/bam.py

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
"""BAM parsing and panel construction."""
2+
3+
from __future__ import annotations
4+
5+
from collections import defaultdict
6+
from pathlib import Path
7+
from typing import Any
8+
9+
from tview.models import Panel
10+
11+
# -- CIGAR operations --------------------------------------------------
12+
CIGAR_MATCH = 0 # M
13+
CIGAR_INS = 1 # I
14+
CIGAR_DEL = 2 # D
15+
CIGAR_REF_SKIP = 3 # N
16+
CIGAR_SOFT_CLIP = 4 # S
17+
CIGAR_SEQ_MATCH = 7 # =
18+
CIGAR_SEQ_MISMATCH = 8 # X
19+
20+
21+
def build_read_row(
22+
read: Any,
23+
ref_start: int,
24+
ref_end: int,
25+
) -> tuple[dict[int, str], dict[int, list[str]]]:
26+
"""Extract aligned bases and insertions from a single pysam read.
27+
28+
Walks the CIGAR string to map query bases onto reference positions,
29+
collecting insertions keyed by their anchor reference position.
30+
31+
Args:
32+
read: A pysam.AlignedSegment with cigartuples and query_sequence.
33+
ref_start: 0-based start of the reference window (inclusive).
34+
ref_end: 0-based end of the reference window (exclusive).
35+
36+
Returns:
37+
A tuple of (aligned, inserts) where aligned maps ref positions to
38+
bases and inserts maps ref positions to lists of inserted bases.
39+
"""
40+
aligned: dict[int, str] = {}
41+
inserts: dict[int, list[str]] = defaultdict(list)
42+
qpos, rpos = 0, read.reference_start
43+
for op, length in read.cigartuples:
44+
if op in (CIGAR_MATCH, CIGAR_SEQ_MATCH, CIGAR_SEQ_MISMATCH):
45+
for _ in range(length):
46+
if ref_start <= rpos < ref_end:
47+
aligned[rpos] = read.query_sequence[qpos].upper()
48+
qpos += 1
49+
rpos += 1
50+
elif op == CIGAR_INS:
51+
anchor = rpos - 1
52+
if ref_start <= anchor < ref_end:
53+
for j in range(length):
54+
inserts[anchor].append(read.query_sequence[qpos + j].upper())
55+
qpos += length
56+
elif op == CIGAR_DEL:
57+
for _ in range(length):
58+
if ref_start <= rpos < ref_end:
59+
aligned[rpos] = "-"
60+
rpos += 1
61+
elif op == CIGAR_REF_SKIP:
62+
rpos += length
63+
elif op == CIGAR_SOFT_CLIP:
64+
qpos += length
65+
return aligned, inserts
66+
67+
68+
def bam_panel(bam_path: str | Path, ref_path: str | Path, region: str) -> Panel:
69+
"""Build a Panel from a BAM file with reference FASTA and genomic region.
70+
71+
Reads are sorted by start position and strand. Insertion columns are
72+
expanded so all reads align on a common grid.
73+
74+
Args:
75+
bam_path: Path to the indexed BAM file.
76+
ref_path: Path to the reference FASTA (must be indexed).
77+
region: Genomic region string in "chrom:start-end" format (0-based start).
78+
79+
Returns:
80+
A Panel with reference row, read rows, insertion columns, and tick labels.
81+
"""
82+
import pysam
83+
84+
chrom, rest = region.split(":")
85+
start, end = [int(x) for x in rest.replace(",", "").split("-")]
86+
87+
with pysam.FastaFile(ref_path) as fasta:
88+
ref_seq = fasta.fetch(chrom, start, end).upper()
89+
90+
with pysam.AlignmentFile(bam_path, "rb") as samfile:
91+
reads = [
92+
r
93+
for r in samfile.fetch(chrom, start, end)
94+
if not r.is_unmapped and r.cigartuples
95+
]
96+
reads.sort(key=lambda r: (r.reference_start, r.is_reverse))
97+
98+
# Find max insertion at each ref position
99+
max_ins: dict[int, int] = defaultdict(int)
100+
read_data = []
101+
for read in reads:
102+
aligned, inserts = build_read_row(read, start, end)
103+
read_data.append((read, aligned, inserts))
104+
for rpos, bases in inserts.items():
105+
max_ins[rpos] = max(max_ins[rpos], len(bases))
106+
107+
# Build column map
108+
col_map: dict[int, int] = {}
109+
ins_col_set: set[int] = set()
110+
col = 0
111+
for rpos in range(start, end):
112+
col_map[rpos] = col
113+
col += 1
114+
n_ins = max_ins.get(rpos, 0)
115+
for j in range(n_ins):
116+
ins_col_set.add(col + j)
117+
col += n_ins
118+
total_cols = col
119+
120+
# Build ref row with '-' in insertion columns
121+
ref_row: list[str] = []
122+
for rpos in range(start, end):
123+
ref_row.append(ref_seq[rpos - start])
124+
for _ in range(max_ins.get(rpos, 0)):
125+
ref_row.append("-")
126+
127+
# Build sequence rows
128+
seq_rows: list[tuple[str, list[str], bool]] = []
129+
for read, aligned, inserts in read_data:
130+
row = [" "] * total_cols
131+
for rpos in range(start, end):
132+
c = col_map[rpos]
133+
if rpos in aligned:
134+
row[c] = aligned[rpos]
135+
if rpos in aligned or rpos in inserts:
136+
n_ins = max_ins.get(rpos, 0)
137+
read_ins = inserts.get(rpos, [])
138+
for j in range(n_ins):
139+
if j < len(read_ins):
140+
row[c + 1 + j] = read_ins[j]
141+
else:
142+
row[c + 1 + j] = "-"
143+
seq_rows.append((read.query_name, row, read.is_reverse))
144+
145+
# Column labels: 1-based relative, ticks at 1, 10, 20...
146+
ref_width = end - start
147+
tick_1based = [1] + list(range(10, ref_width + 1, 10))
148+
col_labels = [
149+
(col_map[start + p - 1], str(p)) for p in tick_1based if (start + p - 1) < end
150+
]
151+
152+
label = Path(bam_path).stem
153+
return Panel(label, ref_row, seq_rows, total_cols, col_labels, ins_col_set)

src/tview/cli.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44

55
import click
66

7-
from tview.tview import bam_panel, fasta_panel, render_panels
7+
from tview.bam import bam_panel
8+
from tview.fasta import fasta_panel
9+
from tview.renderer import render_panels
810

911

1012
def _expand_stdin(paths: list[str]) -> list[str]:
@@ -65,7 +67,15 @@ def _expand_stdin(paths: list[str]) -> list[str]:
6567
@click.option(
6668
"--cell", type=float, default=0.14, show_default=True, help="Cell size in inches."
6769
)
68-
def main(bam, ref, region, fasta, columns, output, palette, dpi, fontsize, cell):
70+
@click.option(
71+
"--classic-mode",
72+
is_flag=True,
73+
default=False,
74+
help="Black-and-white rendering with no color highlighting.",
75+
)
76+
def main(
77+
bam, ref, region, fasta, columns, output, palette, dpi, fontsize, cell, classic_mode
78+
):
6979
"""Publication-quality alignment viewer (BAM or FASTA).
7080
7181
Supports BAM files (with reference FASTA), pre-aligned FASTA (e.g. MAFFT
@@ -95,5 +105,11 @@ def main(bam, ref, region, fasta, columns, output, palette, dpi, fontsize, cell)
95105
panels.append(fasta_panel(fasta_path, col_start, col_end))
96106

97107
render_panels(
98-
panels, output, fontsize=fontsize, dpi=dpi, palette=palette, cell=cell
108+
panels,
109+
output,
110+
fontsize=fontsize,
111+
dpi=dpi,
112+
palette=palette,
113+
cell=cell,
114+
classic=classic_mode,
99115
)

src/tview/config.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
"""Configuration loader for tview color palettes and style settings.
2+
3+
Reads palette.yaml and style.yaml from the package directory and exposes
4+
backward-compatible module-level constants.
5+
"""
6+
7+
from __future__ import annotations
8+
9+
from functools import lru_cache
10+
from pathlib import Path
11+
from typing import Any
12+
13+
import yaml
14+
15+
_PKG_DIR = Path(__file__).parent
16+
17+
18+
@lru_cache(maxsize=1)
19+
def load_palette() -> dict[str, Any]:
20+
"""Load color palette definitions from palette.yaml.
21+
22+
Returns:
23+
Parsed YAML dict with ``nucleotide``, ``amino_acid``, and ``rendering`` keys.
24+
"""
25+
with open(_PKG_DIR / "palette.yaml") as fh:
26+
return yaml.safe_load(fh)
27+
28+
29+
@lru_cache(maxsize=1)
30+
def load_style() -> dict[str, Any]:
31+
"""Load style settings from style.yaml.
32+
33+
Returns:
34+
Parsed YAML dict with ``alpha`` and ``font`` keys.
35+
"""
36+
with open(_PKG_DIR / "style.yaml") as fh:
37+
return yaml.safe_load(fh)
38+
39+
40+
# -- Backward-compatible constants -----------------------------------------
41+
_palette = load_palette()
42+
_style = load_style()
43+
44+
NT_COLORS: dict[str, str] = _palette["nucleotide"]
45+
AA_COLORS: dict[str, str] = _palette["amino_acid"]
46+
MISMATCH_BG: str = _palette["rendering"]["mismatch_bg"]
47+
INS_BG: str = _palette["rendering"]["insertion_bg"]
48+
GAP_COLOR: str = _palette["rendering"]["gap_color"]
49+
SEPARATOR_COLOR: str = _palette["rendering"]["separator_color"]
50+
TEXT_COLOR: str = _palette["rendering"]["text_color"]
51+
FALLBACK_BASE_COLOR: str = _palette["rendering"]["fallback_base_color"]
52+
PANEL_LABEL_COLOR: str = _palette["rendering"]["panel_label_color"]
53+
54+
FWD_ALPHA: float = _style["alpha"]["forward"]
55+
REV_ALPHA: float = _style["alpha"]["reverse"]
56+
FONT_PREFERENCES: list[dict[str, str]] = _style["font"]["preferences"]
57+
FONT_FALLBACK_FILENAME: str = _style["font"]["fallback_filename"]

0 commit comments

Comments
 (0)