Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions docs/content/examples.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1145,6 +1145,92 @@ Here is an example of nanopore reads spanning a foldback inversion (resulting fr
]
}

Large indel visualization
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

The alignments track can visualize large insertions and deletions from CIGAR strings. Deletions are shown as semi-transparent red rectangles with triangular markers at the start and end points, while insertions are marked with boxes along with the number of bases in the insert. This is particularly useful for visualizing structural variants that are captured in the CIGAR strings of long-read data.

.. image:: images/figure_alignments.png

.. toggle::

.. code:: json

{
"general": {
"layout": "horizontal",
"reference": "hg19"
},
"output": {
"file": "figure_alignments.png",
"dpi": 400,
"width": 180
},
"regions": [
{
"chr": "7",
"start": 92400000,
"end": 92440000
}
],
"highlights": [],
"tracks": [
{
"type": "alignments",
"file": "/path/to/sample.bam",
"height": 35,
"margin_above": 1.5,
"bounding_box": false,
"fontscale": 1,
"label": "Long reads",
"label_rotate": true,
"read_color": "#cccccc",
"splitread_color": "#FF9F00",
"link_splitreads": true,
"cigar_deletion_threshold": 50,
"cigar_insertion_threshold": 50,
"deletion_color": "#FF4444",
"insertion_color": "#4444FF",
"hgap_bp": 30,
"vgap_frac": 0.3
},
{
"type": "genes",
"height": 10,
"margin_above": 1.5,
"bounding_box": false,
"fontscale": 1,
"label": "",
"label_rotate": false,
"style": "default",
"collapsed": true,
"only_protein_coding": true,
"exon_color": "#2980b9",
"genes": "auto"
},
{
"type": "chr_axis",
"height": 10,
"margin_above": 1.5,
"bounding_box": false,
"fontscale": 1,
"label": "",
"label_rotate": false,
"style": "default",
"unit": "kb",
"ticklabels_pos": "below",
"ticks_interval": "auto"
}
]
}

The key parameters for indel visualization are:

* ``cigar_deletion_threshold``: Minimum length of deletions to visualize (in base pairs)
* ``cigar_insertion_threshold``: Minimum length of insertions to visualize (in base pairs)
* ``deletion_color``: Color used for deletion overlays
* ``insertion_color``: Color used for insertion overlays

Allele-specific expression
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
174 changes: 160 additions & 14 deletions figeno/track_alignments.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.path as path
import matplotlib.patheffects as path_effects
from matplotlib.collections import PatchCollection
import numpy as np
import pandas as pd
Expand All @@ -20,7 +21,9 @@ def __init__(self,file,label="",label_rotate=False,read_color="#cccccc",splitrea
group_by="none",exchange_haplotypes=False,show_unphased=True,show_haplotype_colors=False,haplotype_colors=[],haplotype_labels=[],rephase=False,
color_by="none",color_unmodified="#1155dd",basemods=[["C","m","#f40202"]],fix_hardclip_basemod=False,rasterize=True,
link_splitreads=False, min_splitreads_breakpoints=2,only_show_splitreads=False, only_one_splitread_per_row=True, link_lw=0.2,hgap_bp=100, vgap_frac=0.3,
is_rna=False,fontscale=1,bounding_box=False,height=50,margin_above=1.5,**kwargs):
is_rna=False,fontscale=1,bounding_box=False,height=50,margin_above=1.5,
cigar_insertion_threshold=None, cigar_deletion_threshold=None,
insertion_color="#4444FF", deletion_color="#FF4444", **kwargs):
if file=="" or file is None:
raise KnownException("Please provide a bam file for the alignments track.")
if not os.path.isfile(file):
Expand Down Expand Up @@ -81,6 +84,10 @@ def __init__(self,file,label="",label_rotate=False,read_color="#cccccc",splitrea
self.bp_counts ={} # map breakpoint to its count
self.splitreads_coords={}
self.kwargs=kwargs
self.cigar_insertion_threshold = int(cigar_insertion_threshold) if cigar_insertion_threshold is not None else None
self.cigar_deletion_threshold = int(cigar_deletion_threshold) if cigar_deletion_threshold is not None else None
self.insertion_color = insertion_color
self.deletion_color = deletion_color

self.n_reads_basemod=0
self.n_reads_nobasemod=0
Expand Down Expand Up @@ -195,19 +202,126 @@ def convert_y(y):
read_start,read_end = max(read_start,read_end),min(read_start,read_end)
else:
read_start,read_end = min(read_start,read_end),max(read_start,read_end)
if ((read.flag&16)==0 and region.orientation=="+") or ((read.flag&16)!=0 and region.orientation=="-"):
vertices = [(read_start,y_converted) , (read_start,y_converted+height),(read_end,y_converted+height),
(read_end+arrow_width,y_converted+height/2),(read_end,y_converted)]
# Determine color first (for both the read and any split read properties)
color = self.readcolor(read, breakpoints=self.breakpoints)
if read.query_name in self.splitreads:
color = self.splitread_color

# Create read vertices
if ((read.flag&16)==0 and region.orientation=="+") or ((read.flag&16)!=0 and region.orientation=="-"):
vertices = [(read_start,y_converted),
(read_start,y_converted+height),
(read_end,y_converted+height),
(read_end+arrow_width,y_converted+height/2),
(read_end,y_converted)]
else:
vertices = [(read_start,y_converted),
(read_start-arrow_width,y_converted+height/2),
(read_start,y_converted+height),
(read_end,y_converted+height),
(read_end,y_converted)]

# Draw base read first
if "projection" in box and box["projection"]=="polar":
vertices = [(max(box["right"],min(box["left"],u)),v) for (u,v) in vertices]
vertices = interpolate_polar_vertices(vertices)
else:
vertices = [(read_start,y_converted) , (read_start-arrow_width,y_converted+height/2) ,(read_start,y_converted+height),
(read_end,y_converted+height),(read_end,y_converted)]
vertices = [(min(box["right"],max(box["left"],u)),v) for (u,v) in vertices]
polygon = patches.Polygon(vertices, color=color, lw=0, zorder=1)
box["ax"].add_patch(polygon)

# Add visualization for large indels if thresholds are set
if (self.cigar_insertion_threshold is not None or self.cigar_deletion_threshold is not None) and read.cigarstring:
indels = parse_cigar_for_large_indels(read.cigarstring,
self.cigar_insertion_threshold,
self.cigar_deletion_threshold)
for op, length, ref_offset in indels:
indel_pos = read.reference_start + ref_offset
indel_x = convert_x(indel_pos)
marker_height = height * 0.8 # Height of the indel marker

if op == 'I': # Insertion - box with length on the read
# Check if the insertion is within the visible region
if region.start <= indel_pos <= region.end:
# Create text with insertion length
text = f"+{length}"

# Calculate fixed width based on figure dimensions rather than read height
figure_width = box["right"] - box["left"]
standard_text_width = figure_width * 0.02 # Fixed percentage of figure width
text_width = len(text) * standard_text_width # Scale by text length
box_height = height # Keep height relative to read

# Create background box for insertion
rect = patches.Rectangle((indel_x - text_width/2, y_converted),
text_width, box_height,
facecolor=self.insertion_color,
alpha=0.3,
edgecolor=self.insertion_color,
linewidth=0.5,
zorder=2)
box["ax"].add_patch(rect)

# Add text in the box
box["ax"].text(indel_x, y_converted + height/2,
text,
horizontalalignment='center',
verticalalignment='center',
fontsize=4,
color='black', # Black text for better contrast
path_effects=[path_effects.withStroke(linewidth=0.5,
foreground=self.insertion_color,
alpha=0.3)], # Add subtle text outline
zorder=3)
elif op == 'D': # Deletion - colored rectangle for full length
# Check if any part of the deletion is within the visible region
deletion_end = indel_pos + length
if not (deletion_end < region.start or indel_pos > region.end):
# Calculate visible portion of deletion
visible_start = max(indel_pos, region.start)
visible_end = min(deletion_end, region.end)
del_start_x = convert_x(visible_start)
del_end_x = convert_x(visible_end)

# Create rectangle for visible portion of deletion
rect = patches.Rectangle((del_start_x, y_converted),
del_end_x - del_start_x,
height,
color=self.deletion_color,
alpha=0.3,
zorder=2)
box["ax"].add_patch(rect)

# Add markers at start and end of deletion
triangle_vertices_start = [
(del_start_x, y_converted - height * 0.1), # Tip
(del_start_x - height * 0.2, y_converted), # Base left
(del_start_x + height * 0.2, y_converted), # Base right
]
triangle_vertices_end = [
(del_end_x, y_converted - height * 0.1), # Tip
(del_end_x - height * 0.2, y_converted), # Base left
(del_end_x + height * 0.2, y_converted), # Base right
]

# Create and add the triangle patches
for vertices in [triangle_vertices_start, triangle_vertices_end]:
triangle = patches.Polygon(vertices,
color=self.deletion_color,
alpha=0.8,
zorder=2)
box["ax"].add_patch(triangle)
if "projection" in box and box["projection"]=="polar":
vertices = [(max(box["right"],min(box["left"],u)),v) for (u,v) in vertices]
vertices = interpolate_polar_vertices(vertices)
else:
vertices = [(min(box["right"],max(box["left"],u)),v) for (u,v) in vertices]
color=self.readcolor(read,breakpoints=self.breakpoints)

# Create and add the base read polygon first
polygon = patches.Polygon(vertices, color=color, lw=0, zorder=1)
box["ax"].add_patch(polygon)

# Splitreads
if read.query_name in self.splitreads:
color = self.splitread_color
Expand All @@ -227,10 +341,6 @@ def convert_y(y):
self.add_splitread_coords(read.query_name,qstart,read_start,y_converted+height/2,"left")
self.add_splitread_coords(read.query_name,qend,read_end+arrow_width,y_converted+height/2,"right")


polygon = patches.Polygon(vertices,color=color,lw=0,zorder=1)
box["ax"].add_patch(polygon)

if self.color_by=="basemod":
if (not "projection" in box) or box["projection"]!="polar": patches_methyl=[]
basemods = decode_read_basemods(read,self.basemods,self.samfile,fix_hardclip_basemod=self.fix_hardclip_basemod,warnings=warnings)
Expand Down Expand Up @@ -452,12 +562,14 @@ def assign_SNPs_haplotypes(self,regions):
print(SNPs)

def readcolor(self,read,breakpoints=[]):
# First check if it's one of our tracked split reads
if read.query_name in self.splitreads:
return self.splitread_color
# Then check breakpoints
for bp in breakpoints:
if read_overlaps_breakpoint(read,bp):
return bp.color
if read.has_tag("SA"):
print("AA")
return self.splitread_color
# Default to base read color
return self.read_color


Expand Down Expand Up @@ -542,4 +654,38 @@ def cigar2reference_span_length(cigar):
length+=int(cigar[current_start:current_pos])
current_start = current_pos+1
current_pos = current_start+1
return length
return length

def parse_cigar_for_large_indels(cigar, ins_threshold=None, del_threshold=None):
"""Parse CIGAR string and return list of large insertions and deletions.
Returns a list of tuples (op, length, ref_pos) where:
- op is 'I' for insertion or 'D' for deletion
- length is the length of the indel
- ref_pos is the reference position where the indel occurs"""
indels = []
ref_pos = 0 # Reference position counter
current_start = 0
current_pos = 1

while current_pos < len(cigar):
# Parse the length
while current_pos < len(cigar) and cigar[current_pos].isdigit():
current_pos += 1

length = int(cigar[current_start:current_pos])
op = cigar[current_pos]

# Check for insertions and deletions
if op == 'I' and ins_threshold is not None and length >= ins_threshold:
indels.append(('I', length, ref_pos))
elif op == 'D' and del_threshold is not None and length >= del_threshold:
indels.append(('D', length, ref_pos))

# Update reference position
if op not in ['S', 'H', 'I', 'P']:
ref_pos += length

current_start = current_pos + 1
current_pos = current_start + 1

return indels