Python API Guide¶
Use PyPopART programmatically in your Python scripts and notebooks.
Basic Workflow¶
from pypopart import Alignment, Network
from pypopart.algorithms import MSTAlgorithm, MJNAlgorithm
from pypopart.visualization import StaticPlot, InteractivePlot
# 1. Load sequence data
alignment = Alignment.from_fasta("sequences.fasta")
# 2. Build network
algorithm = MSTAlgorithm()
network = algorithm.build_network(alignment)
# 3. Visualize
plot = StaticPlot(network)
plot.save("network.png")
Loading Data¶
From Files¶
from pypopart import Alignment
# FASTA format
alignment = Alignment.from_fasta("sequences.fasta")
# NEXUS format (with metadata)
alignment = Alignment.from_nexus("sequences.nex")
# PHYLIP format
alignment = Alignment.from_phylip("sequences.phy")
# GenBank format
alignment = Alignment.from_genbank("sequences.gb")
From Strings¶
from pypopart import Alignment
fasta_string = """
>Seq1
ATCGATCGATCG
>Seq2
ATCGATCGATCG
>Seq3
ATCGATTGATCG
"""
alignment = Alignment.from_string(fasta_string, format="fasta")
From BioPython¶
from Bio import AlignIO
from pypopart import Alignment
# Load with BioPython
bio_alignment = AlignIO.read("sequences.fasta", "fasta")
# Convert to PyPopART
alignment = Alignment.from_biopython(bio_alignment)
Building Networks¶
Algorithm Selection¶
from pypopart.algorithms import (
MSTAlgorithm,
MSNAlgorithm,
TCSAlgorithm,
MJNAlgorithm,
ParsimonyNetAlgorithm,
TSWAlgorithm,
)
# Minimum Spanning Tree
mst = MSTAlgorithm()
network = mst.build_network(alignment)
# Median-Joining Network
mjn = MJNAlgorithm()
network = mjn.build_network(alignment)
# TCS with custom epsilon
tcs = TCSAlgorithm(epsilon=0.99)
network = tcs.build_network(alignment)
Distance Metrics¶
from pypopart.core.distance import DistanceCalculator
# Create calculator with specific metric
calc = DistanceCalculator(metric="k2p") # Kimura 2-parameter
distances = calc.calculate(alignment)
# Available metrics: 'hamming', 'jukes-cantor', 'k2p', 'tamura-nei'
# Use custom distance matrix
algorithm = MSTAlgorithm(distance_matrix=distances)
network = algorithm.build_network(alignment)
Working with Networks¶
Network Properties¶
# Basic properties
print(f"Number of nodes: {network.number_of_nodes()}")
print(f"Number of edges: {network.number_of_edges()}")
print(f"Network density: {network.density()}")
# Get nodes and edges
nodes = list(network.nodes())
edges = list(network.edges())
# Node attributes
for node in network.nodes():
haplotype = network.nodes[node]['haplotype']
frequency = network.nodes[node]['frequency']
print(f"{node}: frequency={frequency}")
# Edge attributes
for u, v in network.edges():
weight = network[u][v]['weight']
print(f"{u} -> {v}: distance={weight}")
Network Statistics¶
from pypopart.stats import NetworkStatistics, TopologyAnalysis
# Calculate basic statistics
stats = NetworkStatistics(network)
print(f"Diameter: {stats.diameter()}")
print(f"Average path length: {stats.average_path_length()}")
print(f"Clustering coefficient: {stats.clustering_coefficient()}")
# Topology analysis
topology = TopologyAnalysis(network)
hubs = topology.identify_hubs()
bridges = topology.identify_bridges()
star_patterns = topology.detect_star_patterns()
Population Genetics¶
from pypopart.stats import PopulationGenetics
popgen = PopulationGenetics(alignment)
# Diversity measures
print(f"Nucleotide diversity: {popgen.nucleotide_diversity()}")
print(f"Haplotype diversity: {popgen.haplotype_diversity()}")
# Neutrality tests
print(f"Tajima's D: {popgen.tajimas_d()}")
print(f"Fu's Fs: {popgen.fus_fs()}")
# Population differentiation (requires population metadata)
fst = popgen.calculate_fst(population_column='Population')
print(f"FST: {fst}")
Visualization¶
Static Plots¶
from pypopart.visualization import StaticPlot
# Basic plot
plot = StaticPlot(network)
plot.save("network.png")
# Customized plot
plot = StaticPlot(
network,
layout="spring", # or 'circular', 'kamada-kawai'
node_size=500,
edge_width=2.0,
figsize=(12, 10),
dpi=300
)
# Color by metadata
plot.color_by_attribute("Population")
plot.save("colored_network.png", format="pdf")
Interactive Plots¶
from pypopart.visualization import InteractivePlot
# Create interactive HTML plot
plot = InteractivePlot(network)
plot.save("network.html")
# With custom styling
plot = InteractivePlot(
network,
layout="spring",
node_size_by="frequency",
color_by="Population",
show_labels=True
)
plot.save("interactive_network.html")
Layout Algorithms¶
from pypopart.layout import LayoutAlgorithm
# Use specific layout
layout = LayoutAlgorithm.spring(network, k=1.0, iterations=50)
plot = StaticPlot(network, positions=layout)
# Available layouts
layouts = [
"spring", # Force-directed (default)
"circular", # Circular arrangement
"kamada-kawai", # Energy minimization
"spectral", # Eigenvalue-based
"random" # Random positions
]
Exporting Results¶
Save Networks¶
# Export in various formats
network.save("network.gml") # GML format
network.save("network.graphml") # GraphML format
network.save("network.json") # JSON format
network.save("network.nexus") # NEXUS format
# With metadata
network.save("network.nexus", include_traits=True)
Export Distance Matrix¶
from pypopart.core.distance import DistanceCalculator
calc = DistanceCalculator(metric="k2p")
distances = calc.calculate(alignment)
# Save as CSV
distances.to_csv("distances.csv")
# Save as NumPy array
import numpy as np
np.save("distances.npy", distances.matrix)
Export Statistics¶
from pypopart.stats import NetworkStatistics
import pandas as pd
stats = NetworkStatistics(network)
results = {
"diameter": stats.diameter(),
"avg_path_length": stats.average_path_length(),
"clustering": stats.clustering_coefficient(),
"num_nodes": network.number_of_nodes(),
"num_edges": network.number_of_edges(),
}
# Save as DataFrame
df = pd.DataFrame([results])
df.to_csv("statistics.csv", index=False)
Advanced Usage¶
Custom Algorithms¶
from pypopart.algorithms.base import BaseAlgorithm
class CustomAlgorithm(BaseAlgorithm):
def build_network(self, alignment):
# Your implementation
network = self.create_empty_network()
# ... add nodes and edges
return network
# Use your algorithm
algorithm = CustomAlgorithm()
network = algorithm.build_network(alignment)
Batch Processing¶
from pathlib import Path
from pypopart import Alignment
from pypopart.algorithms import MSTAlgorithm
# Process multiple files
algorithm = MSTAlgorithm()
for fasta_file in Path("data").glob("*.fasta"):
alignment = Alignment.from_fasta(fasta_file)
network = algorithm.build_network(alignment)
network.save(f"networks/{fasta_file.stem}.gml")
Integration with NetworkX¶
import networkx as nx
# PyPopART networks are NetworkX graphs
# Use any NetworkX function
# Graph algorithms
shortest_paths = nx.shortest_path(network)
betweenness = nx.betweenness_centrality(network)
communities = nx.community.louvain_communities(network)
# Export to other NetworkX formats
nx.write_gexf(network, "network.gexf")
Error Handling¶
from pypopart.exceptions import (
AlignmentError,
NetworkError,
DistanceError,
)
try:
alignment = Alignment.from_fasta("sequences.fasta")
network = algorithm.build_network(alignment)
except AlignmentError as e:
print(f"Alignment error: {e}")
except NetworkError as e:
print(f"Network construction error: {e}")
except FileNotFoundError:
print("Input file not found")
Next Steps¶
- Loading Data Guide: Detailed data loading instructions
- Algorithm Guide: Choose the right algorithm
- Visualization Guide: Create publication-quality figures
- API Reference: Complete API documentation