Dotplot Visualization Tutorial¶

This notebook explores all visualisation options provided by the DotPlotter class.

A dot plot (or dotplot) is a classic bioinformatics visualisation that displays all shared subsequences between two sequences. Each dot represents a shared k-mer; diagonal runs of dots indicate conserved regions. Inversions appear as anti-diagonal lines.

In [ ]:

Copied!

import os
import tempfile

import matplotlib.pyplot as plt

from rusty_dot import SequenceIndex
from rusty_dot.dotplot import DotPlotter
import os
import tempfile

import matplotlib.pyplot as plt

from rusty_dot import SequenceIndex
from rusty_dot.dotplot import DotPlotter

1. Build a test index¶

We create three artificial sequences with different overlap patterns:

In [ ]:

Copied!





# Helper to create a reverse complement
def revcomp(seq):
    table = str.maketrans('ACGTacgt', 'TGCAtgca')
    return seq.translate(table)[::-1]


unit = 'ACGTACGTACGT'  # 12 bp repeat unit
seq_a = unit * 10  # 120 bp  — the reference
seq_b = 'T' + unit * 9 + 'T'  # 120 bp  — shifted by 1
seq_c = revcomp(unit * 5) + unit * 5  # 120 bp  — half inverted

idx = SequenceIndex(k=8)
idx.add_sequence('reference', seq_a)
idx.add_sequence('shifted', seq_b)
idx.add_sequence('partial_inv', seq_c)

print(f'Index: {idx}')
# Helper to create a reverse complement
def revcomp(seq):
    table = str.maketrans('ACGTacgt', 'TGCAtgca')
    return seq.translate(table)[::-1]


unit = 'ACGTACGTACGT'  # 12 bp repeat unit
seq_a = unit * 10  # 120 bp  — the reference
seq_b = 'T' + unit * 9 + 'T'  # 120 bp  — shifted by 1
seq_c = revcomp(unit * 5) + unit * 5  # 120 bp  — half inverted

idx = SequenceIndex(k=8)
idx.add_sequence('reference', seq_a)
idx.add_sequence('shifted', seq_b)
idx.add_sequence('partial_inv', seq_c)

print(f'Index: {idx}')

2. Inline rendering in Jupyter notebooks¶

Both plot() and plot_single() return a matplotlib.figure.Figure. In a Jupyter notebook the returned figure is automatically displayed inline — no file path is required.

Call matplotlib.pyplot.close(fig) when you are done with the figure to free memory.

In [ ]:

Copied!

plotter = DotPlotter(idx)

# No output_path: the figure is returned and displayed inline in Jupyter
fig = plotter.plot(title='All vs All — inline display')
plt.close(fig)  # free memory when no longer needed
plotter = DotPlotter(idx)

# No output_path: the figure is returned and displayed inline in Jupyter
fig = plotter.plot(title='All vs All — inline display')
plt.close(fig)  # free memory when no longer needed

In [ ]:

Copied!





# Inline display for a single pair
fig = plotter.plot_single(
    query_name='reference',
    target_name='partial_inv',
    title='reference vs partial_inv — inline',
)
plt.close(fig)
# Inline display for a single pair
fig = plotter.plot_single(
    query_name='reference',
    target_name='partial_inv',
    title='reference vs partial_inv — inline',
)
plt.close(fig)

3. All-vs-all dotplot (default settings)¶

DotPlotter.plot() without arguments produces an all-vs-all grid using all sequences in the index. Passing output_path saves the figure to disk in addition to returning it.

In [ ]:

Copied!





with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
    all_vs_all_path = fh.name

fig = plotter.plot(
    output_path=all_vs_all_path,
    title='All vs All',
)
plt.close(fig)
print(f'Saved: {all_vs_all_path}  ({os.path.getsize(all_vs_all_path)} bytes)')
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
    all_vs_all_path = fh.name

fig = plotter.plot(
    output_path=all_vs_all_path,
    title='All vs All',
)
plt.close(fig)
print(f'Saved: {all_vs_all_path}  ({os.path.getsize(all_vs_all_path)} bytes)')

4. Subset: specific query and target sets¶

Pass query_names and target_names to restrict the grid to a subset of sequences.

In [ ]:

Copied!





with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
    subset_path = fh.name

plotter.plot(
    query_names=['reference', 'shifted'],
    target_names=['partial_inv'],
    output_path=subset_path,
    title='Reference & Shifted vs Partial Inversion',
)
print(f'Subset plot saved: {subset_path}')
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
    subset_path = fh.name

plotter.plot(
    query_names=['reference', 'shifted'],
    target_names=['partial_inv'],
    output_path=subset_path,
    title='Reference & Shifted vs Partial Inversion',
)
print(f'Subset plot saved: {subset_path}')

5. Single-pair dotplot¶

plot_single renders one comparison panel with its own figure size and title.

In [ ]:

Copied!





with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
    single_path = fh.name

plotter.plot_single(
    query_name='reference',
    target_name='shifted',
    output_path=single_path,
    figsize=(5, 5),
    title='reference vs shifted',
)
print(f'Single-pair plot saved: {single_path}')
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
    single_path = fh.name

plotter.plot_single(
    query_name='reference',
    target_name='shifted',
    output_path=single_path,
    figsize=(5, 5),
    title='reference vs shifted',
)
print(f'Single-pair plot saved: {single_path}')

6. Customising dot appearance¶

All plotting methods accept dot_size and dot_color to control the appearance of match lines.

In [ ]:

Copied!





with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
    styled_path = fh.name

plotter.plot(
    output_path=styled_path,
    dot_size=1.5,
    dot_color='crimson',
    dpi=200,
    title='Custom style: crimson, dpi=200',
)
print(f'Styled plot saved: {styled_path}')
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
    styled_path = fh.name

plotter.plot(
    output_path=styled_path,
    dot_size=1.5,
    dot_color='crimson',
    dpi=200,
    title='Custom style: crimson, dpi=200',
)
print(f'Styled plot saved: {styled_path}')

7. Controlling merge behaviour¶

When merge=True (default), consecutive co-linear k-mer hits are merged into single lines. Set merge=False to display every individual k-mer hit as its own point — useful for inspecting raw k-mer density.

In [ ]:

Copied!





with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
    unmerged_path = fh.name

plotter.plot_single(
    query_name='reference',
    target_name='shifted',
    output_path=unmerged_path,
    merge=False,
    title='reference vs shifted (unmerged k-mer hits)',
)
print(f'Unmerged plot saved: {unmerged_path}')
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
    unmerged_path = fh.name

plotter.plot_single(
    query_name='reference',
    target_name='shifted',
    output_path=unmerged_path,
    merge=False,
    title='reference vs shifted (unmerged k-mer hits)',
)
print(f'Unmerged plot saved: {unmerged_path}')

8. Output resolution¶

Use the dpi parameter to control the resolution of the saved image. Higher DPI is better for print-quality figures.

In [ ]:

Copied!





for dpi in [72, 150, 300]:
    with tempfile.NamedTemporaryFile(suffix=f'_dpi{dpi}.png', delete=False) as fh:
        path = fh.name
    plotter.plot_single(
        'reference',
        'shifted',
        output_path=path,
        dpi=dpi,
        title=f'DPI = {dpi}',
    )
    size_kb = os.path.getsize(path) / 1024
    print(f'DPI={dpi:4d}  file size={size_kb:.1f} kB  path={path}')
for dpi in [72, 150, 300]:
    with tempfile.NamedTemporaryFile(suffix=f'_dpi{dpi}.png', delete=False) as fh:
        path = fh.name
    plotter.plot_single(
        'reference',
        'shifted',
        output_path=path,
        dpi=dpi,
        title=f'DPI = {dpi}',
    )
    size_kb = os.path.getsize(path) / 1024
    print(f'DPI={dpi:4d}  file size={size_kb:.1f} kB  path={path}')

9. Panel size control¶

For all-vs-all grids, figsize_per_panel controls the size (in inches) of each subplot panel.

In [ ]:

Copied!





with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
    large_path = fh.name

plotter.plot(
    output_path=large_path,
    figsize_per_panel=6.0,  # each panel is 6×6 inches
    title='Large panels (6 inches each)',
)
print(f'Large-panel plot saved: {large_path}')
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
    large_path = fh.name

plotter.plot(
    output_path=large_path,
    figsize_per_panel=6.0,  # each panel is 6×6 inches
    title='Large panels (6 inches each)',
)
print(f'Large-panel plot saved: {large_path}')

10. Saving to different file formats¶

rusty-dot passes the format argument directly to matplotlib.savefig, so you can produce PNG, SVG, PDF, or any other matplotlib-supported format.

The simplest approach is to use the matching file extension — matplotlib infers the format automatically. You can also pass format='svg' (or 'pdf', 'png', …) explicitly to override the extension.

Extension / `format=`	Notes
`.png` / `'png'`	Raster; good default for screen and web
`.svg` / `'svg'`	Vector; infinitely scalable, ideal for publications
`.pdf` / `'pdf'`	Vector; embeds cleanly into LaTeX and Word documents

In [ ]:

Copied!





# PNG (default raster format)
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
    png_path = fh.name

fig = plotter.plot(output_path=png_path, title='PNG output')
plt.close(fig)
print(f'PNG:  {png_path}  ({os.path.getsize(png_path)} bytes)')

# SVG via file extension
with tempfile.NamedTemporaryFile(suffix='.svg', delete=False) as fh:
    svg_ext_path = fh.name

fig = plotter.plot(output_path=svg_ext_path, title='SVG via extension')
plt.close(fig)
print(f'SVG (ext):     {svg_ext_path}  ({os.path.getsize(svg_ext_path)} bytes)')

# SVG via explicit format parameter (output path need not end in .svg)
with tempfile.NamedTemporaryFile(suffix='.out', delete=False) as fh:
    svg_fmt_path = fh.name

fig = plotter.plot(output_path=svg_fmt_path, format='svg', title='SVG via format param')
plt.close(fig)
print(f'SVG (fmt):     {svg_fmt_path}  ({os.path.getsize(svg_fmt_path)} bytes)')

# PDF — vector format suitable for LaTeX / Word
with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as fh:
    pdf_path = fh.name

fig = plotter.plot(output_path=pdf_path, title='PDF output')
plt.close(fig)
print(f'PDF:  {pdf_path}  ({os.path.getsize(pdf_path)} bytes)')

# Verify SVG header
with open(svg_ext_path) as f:
    snippet = f.read(80)
print(f'\nSVG file header: {snippet!r}')
# PNG (default raster format)
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
    png_path = fh.name

fig = plotter.plot(output_path=png_path, title='PNG output')
plt.close(fig)
print(f'PNG:  {png_path}  ({os.path.getsize(png_path)} bytes)')

# SVG via file extension
with tempfile.NamedTemporaryFile(suffix='.svg', delete=False) as fh:
    svg_ext_path = fh.name

fig = plotter.plot(output_path=svg_ext_path, title='SVG via extension')
plt.close(fig)
print(f'SVG (ext):     {svg_ext_path}  ({os.path.getsize(svg_ext_path)} bytes)')

# SVG via explicit format parameter (output path need not end in .svg)
with tempfile.NamedTemporaryFile(suffix='.out', delete=False) as fh:
    svg_fmt_path = fh.name

fig = plotter.plot(output_path=svg_fmt_path, format='svg', title='SVG via format param')
plt.close(fig)
print(f'SVG (fmt):     {svg_fmt_path}  ({os.path.getsize(svg_fmt_path)} bytes)')

# PDF — vector format suitable for LaTeX / Word
with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as fh:
    pdf_path = fh.name

fig = plotter.plot(output_path=pdf_path, title='PDF output')
plt.close(fig)
print(f'PDF:  {pdf_path}  ({os.path.getsize(pdf_path)} bytes)')

# Verify SVG header
with open(svg_ext_path) as f:
    snippet = f.read(80)
print(f'\nSVG file header: {snippet!r}')

11. Minimum alignment length filter¶

Pass min_length to suppress alignments shorter than a given number of base pairs. This applies to merged k-mer runs (which may be longer than the original k-mer size after merging) and to any pre-computed PAF alignments that are loaded later.

The filter is applied per match segment; only the length of the query span is checked: query_end - query_start >= min_length.

In [ ]:

Copied!





# Without filtering: all merged hits are drawn
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
    unfiltered_path = fh.name
plotter.plot_single(
    'reference', 'shifted', output_path=unfiltered_path, title='No min_length filter'
)

# With filtering: only hits of at least 24 bp are drawn
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
    filtered_path = fh.name
plotter.plot_single(
    'reference',
    'shifted',
    output_path=filtered_path,
    min_length=24,
    title='min_length=24',
)

print(f'Unfiltered: {unfiltered_path}  ({os.path.getsize(unfiltered_path)} bytes)')
print(f'Filtered:   {filtered_path}  ({os.path.getsize(filtered_path)} bytes)')
# Without filtering: all merged hits are drawn
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
    unfiltered_path = fh.name
plotter.plot_single(
    'reference', 'shifted', output_path=unfiltered_path, title='No min_length filter'
)

# With filtering: only hits of at least 24 bp are drawn
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
    filtered_path = fh.name
plotter.plot_single(
    'reference',
    'shifted',
    output_path=filtered_path,
    min_length=24,
    title='min_length=24',
)

print(f'Unfiltered: {unfiltered_path}  ({os.path.getsize(unfiltered_path)} bytes)')
print(f'Filtered:   {filtered_path}  ({os.path.getsize(filtered_path)} bytes)')

12. Colour alignments by identity (PAF alignments)¶

When alignments are loaded from a PAF file (e.g. produced by minimap2), each record carries a residue_matches count and an alignment_block_len that together define sequence identity. Pass color_by_identity=True to plot() or plot_single() to render each alignment segment with a colour drawn from the chosen Matplotlib colormap (identity_palette, default 'viridis').

Note: Individual k-mer matches are always 100 % identical (exact matches), so color_by_identity only makes sense with PAF-sourced alignments. If you pass color_by_identity=True without supplying a PafAlignment a warning is logged and the plot falls back to the default strand colours.

Use DotPlotter.plot_identity_colorbar() to generate a standalone colorbar figure for the identity scale.

In [ ]:

Copied!





import random

from rusty_dot.paf_io import PafAlignment, PafRecord

# Build synthetic PAF records with varying identity values to illustrate
# the colour-by-identity feature without needing a real aligner.
random.seed(0)
paf_records = []
for i in range(8):
    block_len = random.randint(10, 30)
    identity = 0.6 + 0.04 * i  # 60 % … 88 %
    residue_matches = round(block_len * identity)
    q_start = i * 12
    paf_records.append(
        PafRecord(
            query_name='reference',
            query_len=144,
            query_start=q_start,
            query_end=q_start + block_len,
            strand='+',
            target_name='shifted',
            target_len=144,
            target_start=q_start,
            target_end=q_start + block_len,
            residue_matches=residue_matches,
            alignment_block_len=block_len,
            mapping_quality=255,
        )
    )

paf_aln = PafAlignment(paf_records)
print(f'{len(paf_aln)} records loaded')

# Create a DotPlotter with the PAF alignment attached
identity_plotter = DotPlotter(idx, paf_alignment=paf_aln)

# ── Plot coloured by identity ─────────────────────────────────────────────
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
    identity_path = fh.name

fig = identity_plotter.plot_single(
    'reference',
    'shifted',
    output_path=identity_path,
    color_by_identity=True,
    identity_palette='viridis',
    title='Coloured by identity (viridis)',
)
plt.close(fig)
print(f'Identity plot: {identity_path}  ({os.path.getsize(identity_path)} bytes)')

# ── Colorbar (standalone scale figure) ───────────────────────────────────
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
    colorbar_path = fh.name

fig_cb = identity_plotter.plot_identity_colorbar(
    palette='viridis',
    output_path=colorbar_path,
)
plt.close(fig_cb)
print(f'Colorbar:      {colorbar_path}  ({os.path.getsize(colorbar_path)} bytes)')

# ── Try a different palette ───────────────────────────────────────────────
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
    plasma_path = fh.name

fig_p = identity_plotter.plot_single(
    'reference',
    'shifted',
    output_path=plasma_path,
    color_by_identity=True,
    identity_palette='plasma',
    title='Coloured by identity (plasma)',
)
plt.close(fig_p)
print(f'Plasma plot:   {plasma_path}  ({os.path.getsize(plasma_path)} bytes)')
import random

from rusty_dot.paf_io import PafAlignment, PafRecord

# Build synthetic PAF records with varying identity values to illustrate
# the colour-by-identity feature without needing a real aligner.
random.seed(0)
paf_records = []
for i in range(8):
    block_len = random.randint(10, 30)
    identity = 0.6 + 0.04 * i  # 60 % … 88 %
    residue_matches = round(block_len * identity)
    q_start = i * 12
    paf_records.append(
        PafRecord(
            query_name='reference',
            query_len=144,
            query_start=q_start,
            query_end=q_start + block_len,
            strand='+',
            target_name='shifted',
            target_len=144,
            target_start=q_start,
            target_end=q_start + block_len,
            residue_matches=residue_matches,
            alignment_block_len=block_len,
            mapping_quality=255,
        )
    )

paf_aln = PafAlignment(paf_records)
print(f'{len(paf_aln)} records loaded')

# Create a DotPlotter with the PAF alignment attached
identity_plotter = DotPlotter(idx, paf_alignment=paf_aln)

# ── Plot coloured by identity ─────────────────────────────────────────────
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
    identity_path = fh.name

fig = identity_plotter.plot_single(
    'reference',
    'shifted',
    output_path=identity_path,
    color_by_identity=True,
    identity_palette='viridis',
    title='Coloured by identity (viridis)',
)
plt.close(fig)
print(f'Identity plot: {identity_path}  ({os.path.getsize(identity_path)} bytes)')

# ── Colorbar (standalone scale figure) ───────────────────────────────────
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
    colorbar_path = fh.name

fig_cb = identity_plotter.plot_identity_colorbar(
    palette='viridis',
    output_path=colorbar_path,
)
plt.close(fig_cb)
print(f'Colorbar:      {colorbar_path}  ({os.path.getsize(colorbar_path)} bytes)')

# ── Try a different palette ───────────────────────────────────────────────
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as fh:
    plasma_path = fh.name

fig_p = identity_plotter.plot_single(
    'reference',
    'shifted',
    output_path=plasma_path,
    color_by_identity=True,
    identity_palette='plasma',
    title='Coloured by identity (plasma)',
)
plt.close(fig_p)
print(f'Plasma plot:   {plasma_path}  ({os.path.getsize(plasma_path)} bytes)')

Summary of DotPlotter parameters¶

Parameter	Default	Description
`query_names`	`None`	List of query sequence names (rows); `None` = all
`target_names`	`None`	List of target sequence names (columns); `None` = all
`output_path`	`None`	Output file path; `None` = no file written (inline display only)
`format`	`None`	Output format (e.g. `'svg'`, `'png'`, `'pdf'`); inferred from extension when `None`
`figsize_per_panel`	`4.0`	Inches per subplot panel (all-vs-all only)
`figsize`	`(6, 6)`	Total figure size for `plot_single`
`dot_size`	`0.5`	Line/marker size for each match
`dot_color`	`"blue"`	Colour of forward-strand match lines
`rc_color`	`"red"`	Colour of reverse-complement match lines
`merge`	`True`	Merge co-linear k-mer runs into blocks
`min_length`	`0`	Minimum alignment length to display; `0` = show all
`title`	`None`	Figure title
`dpi`	`150`	Output image resolution
`color_by_identity`	`False`	Colour alignments by identity fraction when a `PafAlignment` is loaded
`identity_palette`	`'viridis'`	Matplotlib colormap for identity colouring (any valid colormap name)

Both plot() and plot_single() return a matplotlib.figure.Figure. In a Jupyter notebook the figure is displayed inline automatically. Call matplotlib.pyplot.close(fig) to release memory when finished.