Dotplot Visualization Tutorial¶
This notebook explores all visualisation options provided by the DotPlotter class.
A dot plot (or dotplot) is a classic bioinformatics visualisation that displays all shared subsequences between two sequences. Each dot represents a shared k-mer; diagonal runs of dots indicate conserved regions. Inversions appear as anti-diagonal lines.
import os
import tempfile
import matplotlib.pyplot as plt
from rusty_dot import SequenceIndex
from rusty_dot.dotplot import DotPlotter
1. Build a test index¶
We create three artificial sequences with different overlap patterns:
# Helper to create a reverse complement
def revcomp(seq):
table = str.maketrans('ACGTacgt', 'TGCAtgca')
return seq.translate(table)[::-1]
unit = 'ACGTACGTACGT' # 12 bp repeat unit
seq_a = unit * 10 # 120 bp — the reference
seq_b = 'T' + unit * 9 + 'T' # 120 bp — shifted by 1
seq_c = revcomp(unit * 5) + unit * 5 # 120 bp — half inverted
idx = SequenceIndex(k=8)
idx.add_sequence('reference', seq_a)
idx.add_sequence('shifted', seq_b)
idx.add_sequence('partial_inv', seq_c)
print(f'Index: {idx}')
2. Inline rendering in Jupyter notebooks¶
Both plot() and plot_single() return a matplotlib.figure.Figure. In a
Jupyter notebook the returned figure is automatically displayed inline — no
file path is required.
Call matplotlib.pyplot.close(fig) when you are done with the figure to
free memory.
plotter = DotPlotter(idx)
# No output_path: the figure is returned and displayed inline in Jupyter
fig = plotter.plot(title='All vs All — inline display')
plt.close(fig) # free memory when no longer needed
# Inline display for a single pair
fig = plotter.plot_single(
query_name='reference',
target_name='partial_inv',
title='reference vs partial_inv — inline',
)
plt.close(fig)
3. All-vs-all dotplot (default settings)¶
DotPlotter.plot() without arguments produces an all-vs-all grid using all sequences
in the index. Passing output_path saves the figure to disk in addition to
returning it.
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
all_vs_all_path = fh.name
fig = plotter.plot(
output_path=all_vs_all_path,
title='All vs All',
)
plt.close(fig)
print(f'Saved: {all_vs_all_path} ({os.path.getsize(all_vs_all_path)} bytes)')
4. Subset: specific query and target sets¶
Pass query_names and target_names to restrict the grid to a subset of sequences.
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
subset_path = fh.name
plotter.plot(
query_names=['reference', 'shifted'],
target_names=['partial_inv'],
output_path=subset_path,
title='Reference & Shifted vs Partial Inversion',
)
print(f'Subset plot saved: {subset_path}')
5. Single-pair dotplot¶
plot_single renders one comparison panel with its own figure size and title.
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
single_path = fh.name
plotter.plot_single(
query_name='reference',
target_name='shifted',
output_path=single_path,
figsize=(5, 5),
title='reference vs shifted',
)
print(f'Single-pair plot saved: {single_path}')
6. Customising dot appearance¶
All plotting methods accept dot_size and dot_color to control the appearance of match lines.
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
styled_path = fh.name
plotter.plot(
output_path=styled_path,
dot_size=1.5,
dot_color='crimson',
dpi=200,
title='Custom style: crimson, dpi=200',
)
print(f'Styled plot saved: {styled_path}')
7. Controlling merge behaviour¶
When merge=True (default), consecutive co-linear k-mer hits are merged into single lines.
Set merge=False to display every individual k-mer hit as its own point — useful for
inspecting raw k-mer density.
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
unmerged_path = fh.name
plotter.plot_single(
query_name='reference',
target_name='shifted',
output_path=unmerged_path,
merge=False,
title='reference vs shifted (unmerged k-mer hits)',
)
print(f'Unmerged plot saved: {unmerged_path}')
8. Output resolution¶
Use the dpi parameter to control the resolution of the saved image.
Higher DPI is better for print-quality figures.
for dpi in [72, 150, 300]:
with tempfile.NamedTemporaryFile(suffix=f'_dpi{dpi}.png', delete=False) as fh:
path = fh.name
plotter.plot_single(
'reference',
'shifted',
output_path=path,
dpi=dpi,
title=f'DPI = {dpi}',
)
size_kb = os.path.getsize(path) / 1024
print(f'DPI={dpi:4d} file size={size_kb:.1f} kB path={path}')
9. Panel size control¶
For all-vs-all grids, figsize_per_panel controls the size (in inches) of each subplot panel.
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
large_path = fh.name
plotter.plot(
output_path=large_path,
figsize_per_panel=6.0, # each panel is 6×6 inches
title='Large panels (6 inches each)',
)
print(f'Large-panel plot saved: {large_path}')
10. Saving to different file formats¶
rusty-dot passes the format argument directly to matplotlib.savefig, so
you can produce PNG, SVG, PDF, or any other matplotlib-supported format.
The simplest approach is to use the matching file extension — matplotlib
infers the format automatically. You can also pass format='svg' (or
'pdf', 'png', …) explicitly to override the extension.
Extension / format= |
Notes |
|---|---|
.png / 'png' |
Raster; good default for screen and web |
.svg / 'svg' |
Vector; infinitely scalable, ideal for publications |
.pdf / 'pdf' |
Vector; embeds cleanly into LaTeX and Word documents |
# PNG (default raster format)
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
png_path = fh.name
fig = plotter.plot(output_path=png_path, title='PNG output')
plt.close(fig)
print(f'PNG: {png_path} ({os.path.getsize(png_path)} bytes)')
# SVG via file extension
with tempfile.NamedTemporaryFile(suffix='.svg', delete=False) as fh:
svg_ext_path = fh.name
fig = plotter.plot(output_path=svg_ext_path, title='SVG via extension')
plt.close(fig)
print(f'SVG (ext): {svg_ext_path} ({os.path.getsize(svg_ext_path)} bytes)')
# SVG via explicit format parameter (output path need not end in .svg)
with tempfile.NamedTemporaryFile(suffix='.out', delete=False) as fh:
svg_fmt_path = fh.name
fig = plotter.plot(output_path=svg_fmt_path, format='svg', title='SVG via format param')
plt.close(fig)
print(f'SVG (fmt): {svg_fmt_path} ({os.path.getsize(svg_fmt_path)} bytes)')
# PDF — vector format suitable for LaTeX / Word
with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as fh:
pdf_path = fh.name
fig = plotter.plot(output_path=pdf_path, title='PDF output')
plt.close(fig)
print(f'PDF: {pdf_path} ({os.path.getsize(pdf_path)} bytes)')
# Verify SVG header
with open(svg_ext_path) as f:
snippet = f.read(80)
print(f'\nSVG file header: {snippet!r}')
11. Minimum alignment length filter¶
Pass min_length to suppress alignments shorter than a given number of base pairs.
This applies to merged k-mer runs (which may be longer than the original k-mer size after merging)
and to any pre-computed PAF alignments that are loaded later.
The filter is applied per match segment; only the length of the query span is checked:
query_end - query_start >= min_length.
# Without filtering: all merged hits are drawn
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
unfiltered_path = fh.name
plotter.plot_single(
'reference', 'shifted', output_path=unfiltered_path, title='No min_length filter'
)
# With filtering: only hits of at least 24 bp are drawn
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
filtered_path = fh.name
plotter.plot_single(
'reference',
'shifted',
output_path=filtered_path,
min_length=24,
title='min_length=24',
)
print(f'Unfiltered: {unfiltered_path} ({os.path.getsize(unfiltered_path)} bytes)')
print(f'Filtered: {filtered_path} ({os.path.getsize(filtered_path)} bytes)')
12. Colour alignments by identity (PAF alignments)¶
When alignments are loaded from a PAF file (e.g. produced by minimap2),
each record carries a residue_matches count and an alignment_block_len
that together define sequence identity. Pass color_by_identity=True to
plot() or plot_single() to render each alignment segment with a colour
drawn from the chosen Matplotlib colormap (identity_palette, default
'viridis').
Note: Individual k-mer matches are always 100 % identical (exact matches), so
color_by_identityonly makes sense with PAF-sourced alignments. If you passcolor_by_identity=Truewithout supplying aPafAlignmenta warning is logged and the plot falls back to the default strand colours.
Use DotPlotter.plot_identity_colorbar() to generate a standalone colorbar
figure for the identity scale.
import random
from rusty_dot.paf_io import PafAlignment, PafRecord
# Build synthetic PAF records with varying identity values to illustrate
# the colour-by-identity feature without needing a real aligner.
random.seed(0)
paf_records = []
for i in range(8):
block_len = random.randint(10, 30)
identity = 0.6 + 0.04 * i # 60 % … 88 %
residue_matches = round(block_len * identity)
q_start = i * 12
paf_records.append(
PafRecord(
query_name='reference',
query_len=144,
query_start=q_start,
query_end=q_start + block_len,
strand='+',
target_name='shifted',
target_len=144,
target_start=q_start,
target_end=q_start + block_len,
residue_matches=residue_matches,
alignment_block_len=block_len,
mapping_quality=255,
)
)
paf_aln = PafAlignment(paf_records)
print(f'{len(paf_aln)} records loaded')
# Create a DotPlotter with the PAF alignment attached
identity_plotter = DotPlotter(idx, paf_alignment=paf_aln)
# ── Plot coloured by identity ─────────────────────────────────────────────
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
identity_path = fh.name
fig = identity_plotter.plot_single(
'reference',
'shifted',
output_path=identity_path,
color_by_identity=True,
identity_palette='viridis',
title='Coloured by identity (viridis)',
)
plt.close(fig)
print(f'Identity plot: {identity_path} ({os.path.getsize(identity_path)} bytes)')
# ── Colorbar (standalone scale figure) ───────────────────────────────────
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
colorbar_path = fh.name
fig_cb = identity_plotter.plot_identity_colorbar(
palette='viridis',
output_path=colorbar_path,
)
plt.close(fig_cb)
print(f'Colorbar: {colorbar_path} ({os.path.getsize(colorbar_path)} bytes)')
# ── Try a different palette ───────────────────────────────────────────────
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
plasma_path = fh.name
fig_p = identity_plotter.plot_single(
'reference',
'shifted',
output_path=plasma_path,
color_by_identity=True,
identity_palette='plasma',
title='Coloured by identity (plasma)',
)
plt.close(fig_p)
print(f'Plasma plot: {plasma_path} ({os.path.getsize(plasma_path)} bytes)')
Summary of DotPlotter parameters¶
| Parameter | Default | Description |
|---|---|---|
query_names |
None |
List of query sequence names (rows); None = all |
target_names |
None |
List of target sequence names (columns); None = all |
output_path |
None |
Output file path; None = no file written (inline display only) |
format |
None |
Output format (e.g. 'svg', 'png', 'pdf'); inferred from extension when None |
figsize_per_panel |
4.0 |
Inches per subplot panel (all-vs-all only) |
figsize |
(6, 6) |
Total figure size for plot_single |
dot_size |
0.5 |
Line/marker size for each match |
dot_color |
"blue" |
Colour of forward-strand match lines |
rc_color |
"red" |
Colour of reverse-complement match lines |
merge |
True |
Merge co-linear k-mer runs into blocks |
min_length |
0 |
Minimum alignment length to display; 0 = show all |
title |
None |
Figure title |
dpi |
150 |
Output image resolution |
color_by_identity |
False |
Colour alignments by identity fraction when a PafAlignment is loaded |
identity_palette |
'viridis' |
Matplotlib colormap for identity colouring (any valid colormap name) |
Both plot() and plot_single() return a matplotlib.figure.Figure.
In a Jupyter notebook the figure is displayed inline automatically.
Call matplotlib.pyplot.close(fig) to release memory when finished.