Skip to content

Network Module

Haplotype network representation and manipulation.

Overview

The graph module provides network data structures built on NetworkX.

Classes

pypopart.core.graph

Haplotype network representation and analysis for PyPopART.

Provides the HaplotypeNetwork class for building and analyzing haplotype networks from DNA sequence data.

NetworkStats dataclass

Statistics about a haplotype network.

Source code in src/pypopart/core/graph.py
@dataclass
class NetworkStats:
    """Statistics about a haplotype network."""

    num_nodes: int
    num_edges: int
    num_haplotypes: int
    num_median_vectors: int
    total_samples: int
    diameter: int
    avg_degree: float
    num_components: int

HaplotypeNetwork

Represents a haplotype network using NetworkX.

A haplotype network is a graph where nodes represent unique haplotypes (or inferred median vectors) and edges represent mutational relationships. Node sizes typically reflect haplotype frequencies, and edge weights represent genetic distances.

Source code in src/pypopart/core/graph.py
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
class HaplotypeNetwork:
    """
    Represents a haplotype network using NetworkX.

    A haplotype network is a graph where nodes represent unique haplotypes
    (or inferred median vectors) and edges represent mutational relationships.
    Node sizes typically reflect haplotype frequencies, and edge weights
    represent genetic distances.
    """

    def __init__(self, name: Optional[str] = None):
        """
        Initialize an empty haplotype network.

        Parameters
        ----------
        name :
            Optional name for the network.
        """
        self.name = name or 'HaplotypeNetwork'
        self._graph = nx.Graph()
        self._haplotype_map: Dict[str, Haplotype] = {}
        self._median_vectors: Set[str] = set()
        self.metadata: Dict[str, Any] = {}

    @classmethod
    def from_serialized(cls, network_data: Dict) -> 'HaplotypeNetwork':
        """
        Reconstruct a HaplotypeNetwork from serialized data.

        Parameters
        ----------
        network_data :
            Dictionary containing serialized network data with 'nodes' and 'edges'.

        Returns
        -------
            Reconstructed HaplotypeNetwork object.
        """
        network = cls()

        # Reconstruct haplotypes from nodes
        for node in network_data.get('nodes', []):
            node_id = node['id']
            sequence_data = node.get('sequence', '')
            is_median = node.get('is_median', False)
            sample_ids = node.get('sample_ids', node.get('samples', []))

            # Create a Sequence object
            seq = Sequence(id=node_id, data=sequence_data)

            # Create a Haplotype object
            haplotype = Haplotype(sequence=seq, sample_ids=sample_ids)

            # Add to network
            network.add_haplotype(haplotype, median_vector=is_median)

        # Add edges
        for edge in network_data.get('edges', []):
            source = edge['source']
            target = edge['target']
            weight = edge.get('weight', 1.0)
            distance = edge.get('distance', 0)
            network.add_edge(source, target, distance=distance, weight=weight)

        return network

    @property
    def graph(self) -> nx.Graph:
        """Get the underlying NetworkX graph."""
        return self._graph

    def add_haplotype(self, haplotype: Haplotype, median_vector: bool = False) -> None:
        """
        Add a haplotype as a node to the network.

        Parameters
        ----------
        haplotype :
            Haplotype object to add.
        median_vector :
            Whether this is an inferred median vector.
        """
        node_id = haplotype.id

        if node_id in self._graph:
            raise ValueError(f"Node '{node_id}' already exists in network")

        # Store haplotype reference
        self._haplotype_map[node_id] = haplotype

        # Track median vectors
        if median_vector:
            self._median_vectors.add(node_id)

        # Add node with attributes
        self._graph.add_node(
            node_id,
            haplotype=haplotype,
            frequency=haplotype.frequency,
            sequence=haplotype.data,
            median_vector=median_vector,
            sample_ids=haplotype.sample_ids,
            populations=list(haplotype.get_populations()),
        )

    def remove_haplotype(self, haplotype_id: str) -> None:
        """
        Remove a haplotype node from the network.

        Parameters
        ----------
        haplotype_id :
            ID of haplotype to remove.

        Raises :
        KeyError :
            If haplotype not found.
        """
        if haplotype_id not in self._graph:
            raise KeyError(f"Haplotype '{haplotype_id}' not found in network")

        self._graph.remove_node(haplotype_id)
        self._haplotype_map.pop(haplotype_id, None)
        self._median_vectors.discard(haplotype_id)

    def add_edge(
        self,
        source: str,
        target: str,
        distance: int = 0,
        weight: float = 1.0,
        **attributes,
    ) -> None:
        """
        Add an edge between two haplotypes.

        Parameters
        ----------
        source :
            Source haplotype ID.
        target :
            Target haplotype ID.
        distance :
            Genetic distance (weight).
        **attributes :
            Additional edge attributes.
        """
        if source not in self._graph:
            raise KeyError(f"Source node '{source}' not found in network")
        if target not in self._graph:
            raise KeyError(f"Target node '{target}' not found in network")

        self._graph.add_edge(
            source, target, weight=weight, distance=distance, **attributes
        )

    def remove_edge(self, source: str, target: str) -> None:
        """
        Remove an edge from the network.

        Parameters
        ----------
        source :
            Source haplotype ID.
        target :
            Target haplotype ID.

        Raises :
        KeyError :
            If edge not found.
        """
        if not self._graph.has_edge(source, target):
            raise KeyError(f'Edge ({source}, {target}) not found in network')

        self._graph.remove_edge(source, target)

    def get_haplotype(self, haplotype_id: str) -> Haplotype:
        """
            Get haplotype by ID.

        Parameters
        ----------
            haplotype_id :
                Haplotype identifier.

        Returns
        -------
            Haplotype object.

            Raises :
            KeyError :
                If haplotype not found.
        """
        if haplotype_id not in self._haplotype_map:
            raise KeyError(f"Haplotype '{haplotype_id}' not found in network")

        return self._haplotype_map[haplotype_id]

    def has_node(self, haplotype_id: str) -> bool:
        """
            Check if node exists in network.

        Parameters
        ----------
            haplotype_id :
                Haplotype identifier.

        Returns
        -------
            True if node exists.
        """
        return haplotype_id in self._graph

    def has_edge(self, source: str, target: str) -> bool:
        """
            Check if edge exists in network.

        Parameters
        ----------
            source :
                Source haplotype ID.
            target :
                Target haplotype ID.

        Returns
        -------
            True if edge exists.
        """
        return self._graph.has_edge(source, target)

    def get_edge_distance(self, source: str, target: str) -> float:
        """
            Get distance for an edge.

        Parameters
        ----------
            source :
                Source haplotype ID.
            target :
                Target haplotype ID.

        Returns
        -------
            Edge distance.

            Raises :
            KeyError :
                If edge not found.
        """
        if not self.has_edge(source, target):
            raise KeyError(f'Edge ({source}, {target}) not found')

        return self._graph[source][target]['distance']

    def get_neighbors(self, haplotype_id: str) -> List[str]:
        """
            Get neighboring haplotype IDs.

        Parameters
        ----------
            haplotype_id :
                Haplotype identifier.

        Returns
        -------
            List of neighbor IDs.

            Raises :
            KeyError :
                If haplotype not found.
        """
        if not self.has_node(haplotype_id):
            raise KeyError(f"Haplotype '{haplotype_id}' not found")

        return list(self._graph.neighbors(haplotype_id))

    def get_degree(self, haplotype_id: str) -> int:
        """
            Get degree (number of connections) for a node.

        Parameters
        ----------
            haplotype_id :
                Haplotype identifier.

        Returns
        -------
            Node degree.
        """
        if not self.has_node(haplotype_id):
            raise KeyError(f"Haplotype '{haplotype_id}' not found")

        return self._graph.degree[haplotype_id]

    @property
    def num_nodes(self) -> int:
        """Get number of nodes in network."""
        return self._graph.number_of_nodes()

    @property
    def num_edges(self) -> int:
        """Get number of edges in network."""
        return self._graph.number_of_edges()

    @property
    def nodes(self) -> List[str]:
        """Get list of node IDs."""
        return list(self._graph.nodes())

    @property
    def edges(self) -> List[Tuple[str, str]]:
        """Get list of edges as (source, target) tuples."""
        return list(self._graph.edges())

    @property
    def haplotypes(self) -> List[Haplotype]:
        """Get list of all haplotypes (excluding median vectors)."""
        return [
            hap
            for hap_id, hap in self._haplotype_map.items()
            if hap_id not in self._median_vectors
        ]

    @property
    def median_vector_ids(self) -> List[str]:
        """Get list of median vector node IDs."""
        return sorted(self._median_vectors)

    def is_median_vector(self, node_id: str) -> bool:
        """
            Check if a node is a median vector.

        Parameters
        ----------
            node_id :
                Node identifier.

        Returns
        -------
            True if node is a median vector.
        """
        return node_id in self._median_vectors

    def is_connected(self) -> bool:
        """
        Check if network is fully connected.

        Returns
        -------
            True if all nodes are in one connected component.
        """
        return nx.is_connected(self._graph)

    def get_connected_components(self) -> List[Set[str]]:
        """
        Get connected components of the network.

        Returns
        -------
            List of sets, each containing node IDs in a component.
        """
        return [set(component) for component in nx.connected_components(self._graph)]

    def calculate_diameter(self) -> int:
        """
        Calculate network diameter (longest shortest path).

        Returns
        -------
            Network diameter, or -1 if not connected.
        """
        if not self.is_connected():
            return -1

        return nx.diameter(self._graph)

    def get_shortest_path(self, source: str, target: str) -> List[str]:
        """
            Find shortest path between two nodes.

        Parameters
        ----------
            source :
                Source node ID.
            target :
                Target node ID.

        Returns
        -------
            List of node IDs in the shortest path.

            Raises :
                nx.NetworkXNoPath: If no path exists
        """
        return nx.shortest_path(self._graph, source, target)

    def get_shortest_path_length(self, source: str, target: str) -> int:
        """
            Get length of shortest path between two nodes.

        Parameters
        ----------
            source :
                Source node ID.
            target :
                Target node ID.

        Returns
        -------
            Number of edges in shortest path.

            Raises :
                nx.NetworkXNoPath: If no path exists
        """
        return nx.shortest_path_length(self._graph, source, target)

    def calculate_centrality(self) -> Dict[str, float]:
        """
        Calculate betweenness centrality for all nodes.

        Returns
        -------
            Dictionary mapping node ID to centrality score.
        """
        return nx.betweenness_centrality(self._graph)

    def get_total_samples(self) -> int:
        """
        Get total number of samples represented in the network.

        Returns
        -------
            Total sample count.
        """
        return sum(hap.frequency for hap in self._haplotype_map.values())

    def calculate_stats(self) -> NetworkStats:
        """
        Calculate comprehensive network statistics.

        Returns
        -------
            NetworkStats object with network metrics.
        """
        num_nodes = self.num_nodes
        num_edges = self.num_edges
        num_haplotypes = len(self.haplotypes)
        num_median = len(self._median_vectors)
        total_samples = self.get_total_samples()

        # Calculate diameter (handle disconnected networks)
        try:
            diameter = self.calculate_diameter()
        except Exception:
            diameter = -1

        # Calculate average degree
        if num_nodes > 0:
            avg_degree = (2 * num_edges) / num_nodes
        else:
            avg_degree = 0.0

        # Count connected components
        num_components = nx.number_connected_components(self._graph)

        return NetworkStats(
            num_nodes=num_nodes,
            num_edges=num_edges,
            num_haplotypes=num_haplotypes,
            num_median_vectors=num_median,
            total_samples=total_samples,
            diameter=diameter,
            avg_degree=avg_degree,
            num_components=num_components,
        )

    def validate(self) -> None:
        """
        Validate network structure.

        Raises
        ------
            ValueError: If network is invalid
        """
        # Check all nodes have haplotypes
        for node_id in self._graph.nodes():
            if node_id not in self._haplotype_map:
                raise ValueError(f"Node '{node_id}' missing haplotype reference")

        # Check all edges have valid endpoints
        for source, target in self._graph.edges():
            if source not in self._graph:
                raise ValueError(f"Edge references non-existent source '{source}'")
            if target not in self._graph:
                raise ValueError(f"Edge references non-existent target '{target}'")

        # Check edge distances are non-negative
        for source, target, data in self._graph.edges(data=True):
            distance = data.get('distance', 0)
            if distance < 0:
                raise ValueError(
                    f'Edge ({source}, {target}) has negative distance {distance}'
                )

    def to_networkx(self) -> nx.Graph:
        """
        Get the underlying NetworkX graph.

        Returns
        -------
            NetworkX Graph object.
        """
        return self._graph.copy()

    def to_dict(self) -> Dict[str, Any]:
        """
        Convert network to dictionary representation.

        Returns
        -------
            Dictionary with network data.
        """
        nodes_data = []
        for node_id in self._graph.nodes():
            hap = self._haplotype_map[node_id]
            nodes_data.append(
                {
                    'id': node_id,
                    'frequency': hap.frequency,
                    'sequence': hap.data,
                    'median_vector': self.is_median_vector(node_id),
                    'sample_ids': hap.sample_ids,
                    'populations': list(hap.get_populations()),
                }
            )

        edges_data = []
        for source, target, data in self._graph.edges(data=True):
            edges_data.append(
                {'source': source, 'target': target, 'distance': data['distance']}
            )

        return {
            'name': self.name,
            'nodes': nodes_data,
            'edges': edges_data,
            'metadata': self.metadata,
        }

    def __len__(self) -> int:
        """Return number of nodes in network."""
        return self.num_nodes

    def __str__(self) -> str:
        """Return string representation."""
        stats = self.calculate_stats()
        return (
            f'{self.name}: {stats.num_haplotypes} haplotypes, '
            f'{stats.num_median_vectors} median vectors, '
            f'{stats.num_edges} edges'
        )

    def __repr__(self) -> str:
        """Detailed representation."""
        return f'HaplotypeNetwork(nodes={self.num_nodes}, edges={self.num_edges})'
graph property
graph: Graph

Get the underlying NetworkX graph.

num_nodes property
num_nodes: int

Get number of nodes in network.

num_edges property
num_edges: int

Get number of edges in network.

nodes property
nodes: List[str]

Get list of node IDs.

edges property
edges: List[Tuple[str, str]]

Get list of edges as (source, target) tuples.

haplotypes property
haplotypes: List[Haplotype]

Get list of all haplotypes (excluding median vectors).

median_vector_ids property
median_vector_ids: List[str]

Get list of median vector node IDs.

__init__
__init__(name: Optional[str] = None)

Initialize an empty haplotype network.

Parameters:

Name Type Description Default
name Optional[str]

Optional name for the network.

None
Source code in src/pypopart/core/graph.py
def __init__(self, name: Optional[str] = None):
    """
    Initialize an empty haplotype network.

    Parameters
    ----------
    name :
        Optional name for the network.
    """
    self.name = name or 'HaplotypeNetwork'
    self._graph = nx.Graph()
    self._haplotype_map: Dict[str, Haplotype] = {}
    self._median_vectors: Set[str] = set()
    self.metadata: Dict[str, Any] = {}
from_serialized classmethod
from_serialized(network_data: Dict) -> HaplotypeNetwork

Reconstruct a HaplotypeNetwork from serialized data.

Parameters:

Name Type Description Default
network_data Dict

Dictionary containing serialized network data with 'nodes' and 'edges'.

required

Returns:

Type Description
Reconstructed HaplotypeNetwork object.
Source code in src/pypopart/core/graph.py
@classmethod
def from_serialized(cls, network_data: Dict) -> 'HaplotypeNetwork':
    """
    Reconstruct a HaplotypeNetwork from serialized data.

    Parameters
    ----------
    network_data :
        Dictionary containing serialized network data with 'nodes' and 'edges'.

    Returns
    -------
        Reconstructed HaplotypeNetwork object.
    """
    network = cls()

    # Reconstruct haplotypes from nodes
    for node in network_data.get('nodes', []):
        node_id = node['id']
        sequence_data = node.get('sequence', '')
        is_median = node.get('is_median', False)
        sample_ids = node.get('sample_ids', node.get('samples', []))

        # Create a Sequence object
        seq = Sequence(id=node_id, data=sequence_data)

        # Create a Haplotype object
        haplotype = Haplotype(sequence=seq, sample_ids=sample_ids)

        # Add to network
        network.add_haplotype(haplotype, median_vector=is_median)

    # Add edges
    for edge in network_data.get('edges', []):
        source = edge['source']
        target = edge['target']
        weight = edge.get('weight', 1.0)
        distance = edge.get('distance', 0)
        network.add_edge(source, target, distance=distance, weight=weight)

    return network
add_haplotype
add_haplotype(
    haplotype: Haplotype, median_vector: bool = False
) -> None

Add a haplotype as a node to the network.

Parameters:

Name Type Description Default
haplotype Haplotype

Haplotype object to add.

required
median_vector bool

Whether this is an inferred median vector.

False
Source code in src/pypopart/core/graph.py
def add_haplotype(self, haplotype: Haplotype, median_vector: bool = False) -> None:
    """
    Add a haplotype as a node to the network.

    Parameters
    ----------
    haplotype :
        Haplotype object to add.
    median_vector :
        Whether this is an inferred median vector.
    """
    node_id = haplotype.id

    if node_id in self._graph:
        raise ValueError(f"Node '{node_id}' already exists in network")

    # Store haplotype reference
    self._haplotype_map[node_id] = haplotype

    # Track median vectors
    if median_vector:
        self._median_vectors.add(node_id)

    # Add node with attributes
    self._graph.add_node(
        node_id,
        haplotype=haplotype,
        frequency=haplotype.frequency,
        sequence=haplotype.data,
        median_vector=median_vector,
        sample_ids=haplotype.sample_ids,
        populations=list(haplotype.get_populations()),
    )
remove_haplotype
remove_haplotype(haplotype_id: str) -> None

Remove a haplotype node from the network.

Parameters:

Name Type Description Default
haplotype_id str

ID of haplotype to remove.

required
Raises
required
KeyError

If haplotype not found.

required
Source code in src/pypopart/core/graph.py
def remove_haplotype(self, haplotype_id: str) -> None:
    """
    Remove a haplotype node from the network.

    Parameters
    ----------
    haplotype_id :
        ID of haplotype to remove.

    Raises :
    KeyError :
        If haplotype not found.
    """
    if haplotype_id not in self._graph:
        raise KeyError(f"Haplotype '{haplotype_id}' not found in network")

    self._graph.remove_node(haplotype_id)
    self._haplotype_map.pop(haplotype_id, None)
    self._median_vectors.discard(haplotype_id)
add_edge
add_edge(
    source: str,
    target: str,
    distance: int = 0,
    weight: float = 1.0,
    **attributes
) -> None

Add an edge between two haplotypes.

Parameters:

Name Type Description Default
source str

Source haplotype ID.

required
target str

Target haplotype ID.

required
distance int

Genetic distance (weight).

0
**attributes

Additional edge attributes.

{}
Source code in src/pypopart/core/graph.py
def add_edge(
    self,
    source: str,
    target: str,
    distance: int = 0,
    weight: float = 1.0,
    **attributes,
) -> None:
    """
    Add an edge between two haplotypes.

    Parameters
    ----------
    source :
        Source haplotype ID.
    target :
        Target haplotype ID.
    distance :
        Genetic distance (weight).
    **attributes :
        Additional edge attributes.
    """
    if source not in self._graph:
        raise KeyError(f"Source node '{source}' not found in network")
    if target not in self._graph:
        raise KeyError(f"Target node '{target}' not found in network")

    self._graph.add_edge(
        source, target, weight=weight, distance=distance, **attributes
    )
remove_edge
remove_edge(source: str, target: str) -> None

Remove an edge from the network.

Parameters:

Name Type Description Default
source str

Source haplotype ID.

required
target str

Target haplotype ID.

required
Raises
required
KeyError

If edge not found.

required
Source code in src/pypopart/core/graph.py
def remove_edge(self, source: str, target: str) -> None:
    """
    Remove an edge from the network.

    Parameters
    ----------
    source :
        Source haplotype ID.
    target :
        Target haplotype ID.

    Raises :
    KeyError :
        If edge not found.
    """
    if not self._graph.has_edge(source, target):
        raise KeyError(f'Edge ({source}, {target}) not found in network')

    self._graph.remove_edge(source, target)
get_haplotype
get_haplotype(haplotype_id: str) -> Haplotype
Get haplotype by ID.

Returns:

Type Description
Haplotype object.

Raises : KeyError : If haplotype not found.

Source code in src/pypopart/core/graph.py
def get_haplotype(self, haplotype_id: str) -> Haplotype:
    """
        Get haplotype by ID.

    Parameters
    ----------
        haplotype_id :
            Haplotype identifier.

    Returns
    -------
        Haplotype object.

        Raises :
        KeyError :
            If haplotype not found.
    """
    if haplotype_id not in self._haplotype_map:
        raise KeyError(f"Haplotype '{haplotype_id}' not found in network")

    return self._haplotype_map[haplotype_id]
has_node
has_node(haplotype_id: str) -> bool
Check if node exists in network.

Returns:

Type Description
True if node exists.
Source code in src/pypopart/core/graph.py
def has_node(self, haplotype_id: str) -> bool:
    """
        Check if node exists in network.

    Parameters
    ----------
        haplotype_id :
            Haplotype identifier.

    Returns
    -------
        True if node exists.
    """
    return haplotype_id in self._graph
has_edge
has_edge(source: str, target: str) -> bool
Check if edge exists in network.

Returns:

Type Description
True if edge exists.
Source code in src/pypopart/core/graph.py
def has_edge(self, source: str, target: str) -> bool:
    """
        Check if edge exists in network.

    Parameters
    ----------
        source :
            Source haplotype ID.
        target :
            Target haplotype ID.

    Returns
    -------
        True if edge exists.
    """
    return self._graph.has_edge(source, target)
get_edge_distance
get_edge_distance(source: str, target: str) -> float
Get distance for an edge.

Returns:

Type Description
Edge distance.

Raises : KeyError : If edge not found.

Source code in src/pypopart/core/graph.py
def get_edge_distance(self, source: str, target: str) -> float:
    """
        Get distance for an edge.

    Parameters
    ----------
        source :
            Source haplotype ID.
        target :
            Target haplotype ID.

    Returns
    -------
        Edge distance.

        Raises :
        KeyError :
            If edge not found.
    """
    if not self.has_edge(source, target):
        raise KeyError(f'Edge ({source}, {target}) not found')

    return self._graph[source][target]['distance']
get_neighbors
get_neighbors(haplotype_id: str) -> List[str]
Get neighboring haplotype IDs.

Returns:

Type Description
List of neighbor IDs.

Raises : KeyError : If haplotype not found.

Source code in src/pypopart/core/graph.py
def get_neighbors(self, haplotype_id: str) -> List[str]:
    """
        Get neighboring haplotype IDs.

    Parameters
    ----------
        haplotype_id :
            Haplotype identifier.

    Returns
    -------
        List of neighbor IDs.

        Raises :
        KeyError :
            If haplotype not found.
    """
    if not self.has_node(haplotype_id):
        raise KeyError(f"Haplotype '{haplotype_id}' not found")

    return list(self._graph.neighbors(haplotype_id))
get_degree
get_degree(haplotype_id: str) -> int
Get degree (number of connections) for a node.

Returns:

Type Description
Node degree.
Source code in src/pypopart/core/graph.py
def get_degree(self, haplotype_id: str) -> int:
    """
        Get degree (number of connections) for a node.

    Parameters
    ----------
        haplotype_id :
            Haplotype identifier.

    Returns
    -------
        Node degree.
    """
    if not self.has_node(haplotype_id):
        raise KeyError(f"Haplotype '{haplotype_id}' not found")

    return self._graph.degree[haplotype_id]
is_median_vector
is_median_vector(node_id: str) -> bool
Check if a node is a median vector.

Returns:

Type Description
True if node is a median vector.
Source code in src/pypopart/core/graph.py
def is_median_vector(self, node_id: str) -> bool:
    """
        Check if a node is a median vector.

    Parameters
    ----------
        node_id :
            Node identifier.

    Returns
    -------
        True if node is a median vector.
    """
    return node_id in self._median_vectors
is_connected
is_connected() -> bool

Check if network is fully connected.

Returns:

Type Description
True if all nodes are in one connected component.
Source code in src/pypopart/core/graph.py
def is_connected(self) -> bool:
    """
    Check if network is fully connected.

    Returns
    -------
        True if all nodes are in one connected component.
    """
    return nx.is_connected(self._graph)
get_connected_components
get_connected_components() -> List[Set[str]]

Get connected components of the network.

Returns:

Type Description
List of sets, each containing node IDs in a component.
Source code in src/pypopart/core/graph.py
def get_connected_components(self) -> List[Set[str]]:
    """
    Get connected components of the network.

    Returns
    -------
        List of sets, each containing node IDs in a component.
    """
    return [set(component) for component in nx.connected_components(self._graph)]
calculate_diameter
calculate_diameter() -> int

Calculate network diameter (longest shortest path).

Returns:

Type Description
Network diameter, or -1 if not connected.
Source code in src/pypopart/core/graph.py
def calculate_diameter(self) -> int:
    """
    Calculate network diameter (longest shortest path).

    Returns
    -------
        Network diameter, or -1 if not connected.
    """
    if not self.is_connected():
        return -1

    return nx.diameter(self._graph)
get_shortest_path
get_shortest_path(source: str, target: str) -> List[str]
Find shortest path between two nodes.

Returns:

Type Description
List of node IDs in the shortest path.

Raises : nx.NetworkXNoPath: If no path exists

Source code in src/pypopart/core/graph.py
def get_shortest_path(self, source: str, target: str) -> List[str]:
    """
        Find shortest path between two nodes.

    Parameters
    ----------
        source :
            Source node ID.
        target :
            Target node ID.

    Returns
    -------
        List of node IDs in the shortest path.

        Raises :
            nx.NetworkXNoPath: If no path exists
    """
    return nx.shortest_path(self._graph, source, target)
get_shortest_path_length
get_shortest_path_length(source: str, target: str) -> int
Get length of shortest path between two nodes.

Returns:

Type Description
Number of edges in shortest path.

Raises : nx.NetworkXNoPath: If no path exists

Source code in src/pypopart/core/graph.py
def get_shortest_path_length(self, source: str, target: str) -> int:
    """
        Get length of shortest path between two nodes.

    Parameters
    ----------
        source :
            Source node ID.
        target :
            Target node ID.

    Returns
    -------
        Number of edges in shortest path.

        Raises :
            nx.NetworkXNoPath: If no path exists
    """
    return nx.shortest_path_length(self._graph, source, target)
calculate_centrality
calculate_centrality() -> Dict[str, float]

Calculate betweenness centrality for all nodes.

Returns:

Type Description
Dictionary mapping node ID to centrality score.
Source code in src/pypopart/core/graph.py
def calculate_centrality(self) -> Dict[str, float]:
    """
    Calculate betweenness centrality for all nodes.

    Returns
    -------
        Dictionary mapping node ID to centrality score.
    """
    return nx.betweenness_centrality(self._graph)
get_total_samples
get_total_samples() -> int

Get total number of samples represented in the network.

Returns:

Type Description
Total sample count.
Source code in src/pypopart/core/graph.py
def get_total_samples(self) -> int:
    """
    Get total number of samples represented in the network.

    Returns
    -------
        Total sample count.
    """
    return sum(hap.frequency for hap in self._haplotype_map.values())
calculate_stats
calculate_stats() -> NetworkStats

Calculate comprehensive network statistics.

Returns:

Type Description
NetworkStats object with network metrics.
Source code in src/pypopart/core/graph.py
def calculate_stats(self) -> NetworkStats:
    """
    Calculate comprehensive network statistics.

    Returns
    -------
        NetworkStats object with network metrics.
    """
    num_nodes = self.num_nodes
    num_edges = self.num_edges
    num_haplotypes = len(self.haplotypes)
    num_median = len(self._median_vectors)
    total_samples = self.get_total_samples()

    # Calculate diameter (handle disconnected networks)
    try:
        diameter = self.calculate_diameter()
    except Exception:
        diameter = -1

    # Calculate average degree
    if num_nodes > 0:
        avg_degree = (2 * num_edges) / num_nodes
    else:
        avg_degree = 0.0

    # Count connected components
    num_components = nx.number_connected_components(self._graph)

    return NetworkStats(
        num_nodes=num_nodes,
        num_edges=num_edges,
        num_haplotypes=num_haplotypes,
        num_median_vectors=num_median,
        total_samples=total_samples,
        diameter=diameter,
        avg_degree=avg_degree,
        num_components=num_components,
    )
validate
validate() -> None

Validate network structure.

Raises:

Type Description
ValueError: If network is invalid
Source code in src/pypopart/core/graph.py
def validate(self) -> None:
    """
    Validate network structure.

    Raises
    ------
        ValueError: If network is invalid
    """
    # Check all nodes have haplotypes
    for node_id in self._graph.nodes():
        if node_id not in self._haplotype_map:
            raise ValueError(f"Node '{node_id}' missing haplotype reference")

    # Check all edges have valid endpoints
    for source, target in self._graph.edges():
        if source not in self._graph:
            raise ValueError(f"Edge references non-existent source '{source}'")
        if target not in self._graph:
            raise ValueError(f"Edge references non-existent target '{target}'")

    # Check edge distances are non-negative
    for source, target, data in self._graph.edges(data=True):
        distance = data.get('distance', 0)
        if distance < 0:
            raise ValueError(
                f'Edge ({source}, {target}) has negative distance {distance}'
            )
to_networkx
to_networkx() -> nx.Graph

Get the underlying NetworkX graph.

Returns:

Type Description
NetworkX Graph object.
Source code in src/pypopart/core/graph.py
def to_networkx(self) -> nx.Graph:
    """
    Get the underlying NetworkX graph.

    Returns
    -------
        NetworkX Graph object.
    """
    return self._graph.copy()
to_dict
to_dict() -> Dict[str, Any]

Convert network to dictionary representation.

Returns:

Type Description
Dictionary with network data.
Source code in src/pypopart/core/graph.py
def to_dict(self) -> Dict[str, Any]:
    """
    Convert network to dictionary representation.

    Returns
    -------
        Dictionary with network data.
    """
    nodes_data = []
    for node_id in self._graph.nodes():
        hap = self._haplotype_map[node_id]
        nodes_data.append(
            {
                'id': node_id,
                'frequency': hap.frequency,
                'sequence': hap.data,
                'median_vector': self.is_median_vector(node_id),
                'sample_ids': hap.sample_ids,
                'populations': list(hap.get_populations()),
            }
        )

    edges_data = []
    for source, target, data in self._graph.edges(data=True):
        edges_data.append(
            {'source': source, 'target': target, 'distance': data['distance']}
        )

    return {
        'name': self.name,
        'nodes': nodes_data,
        'edges': edges_data,
        'metadata': self.metadata,
    }
__len__
__len__() -> int

Return number of nodes in network.

Source code in src/pypopart/core/graph.py
def __len__(self) -> int:
    """Return number of nodes in network."""
    return self.num_nodes
__str__
__str__() -> str

Return string representation.

Source code in src/pypopart/core/graph.py
def __str__(self) -> str:
    """Return string representation."""
    stats = self.calculate_stats()
    return (
        f'{self.name}: {stats.num_haplotypes} haplotypes, '
        f'{stats.num_median_vectors} median vectors, '
        f'{stats.num_edges} edges'
    )
__repr__
__repr__() -> str

Detailed representation.

Source code in src/pypopart/core/graph.py
def __repr__(self) -> str:
    """Detailed representation."""
    return f'HaplotypeNetwork(nodes={self.num_nodes}, edges={self.num_edges})'