This repository has been archived on 2023-03-25. You can view files and clone it, but cannot push or open issues or pull requests.
mightyscape-1.1-deprecated/extensions/networkx/algorithms/tree/mst.py
2020-07-30 01:16:18 +02:00

612 lines
21 KiB
Python

# -*- coding: utf-8 -*-
# Copyright (C) 2017 NetworkX Developers
# Aric Hagberg <hagberg@lanl.gov>
# Dan Schult <dschult@colgate.edu>
# Pieter Swart <swart@lanl.gov>
# Loïc Séguin-C. <loicseguin@gmail.com>
# All rights reserved.
# BSD license.
"""
Algorithms for calculating min/max spanning trees/forests.
"""
from heapq import heappop, heappush
from operator import itemgetter
from itertools import count
from math import isnan
import networkx as nx
from networkx.utils import UnionFind, not_implemented_for
__all__ = [
'minimum_spanning_edges', 'maximum_spanning_edges',
'minimum_spanning_tree', 'maximum_spanning_tree',
]
@not_implemented_for('multigraph')
def boruvka_mst_edges(G, minimum=True, weight='weight',
keys=False, data=True, ignore_nan=False):
"""Iterate over edges of a Borůvka's algorithm min/max spanning tree.
Parameters
----------
G : NetworkX Graph
The edges of `G` must have distinct weights,
otherwise the edges may not form a tree.
minimum : bool (default: True)
Find the minimum (True) or maximum (False) spanning tree.
weight : string (default: 'weight')
The name of the edge attribute holding the edge weights.
keys : bool (default: True)
This argument is ignored since this function is not
implemented for multigraphs; it exists only for consistency
with the other minimum spanning tree functions.
data : bool (default: True)
Flag for whether to yield edge attribute dicts.
If True, yield edges `(u, v, d)`, where `d` is the attribute dict.
If False, yield edges `(u, v)`.
ignore_nan : bool (default: False)
If a NaN is found as an edge weight normally an exception is raised.
If `ignore_nan is True` then that edge is ignored instead.
"""
# Initialize a forest, assuming initially that it is the discrete
# partition of the nodes of the graph.
forest = UnionFind(G)
def best_edge(component):
"""Returns the optimum (minimum or maximum) edge on the edge
boundary of the given set of nodes.
A return value of ``None`` indicates an empty boundary.
"""
sign = 1 if minimum else -1
minwt = float('inf')
boundary = None
for e in nx.edge_boundary(G, component, data=True):
wt = e[-1].get(weight, 1) * sign
if isnan(wt):
if ignore_nan:
continue
msg = "NaN found as an edge weight. Edge %s"
raise ValueError(msg % (e,))
if wt < minwt:
minwt = wt
boundary = e
return boundary
# Determine the optimum edge in the edge boundary of each component
# in the forest.
best_edges = (best_edge(component) for component in forest.to_sets())
best_edges = [edge for edge in best_edges if edge is not None]
# If each entry was ``None``, that means the graph was disconnected,
# so we are done generating the forest.
while best_edges:
# Determine the optimum edge in the edge boundary of each
# component in the forest.
#
# This must be a sequence, not an iterator. In this list, the
# same edge may appear twice, in different orientations (but
# that's okay, since a union operation will be called on the
# endpoints the first time it is seen, but not the second time).
#
# Any ``None`` indicates that the edge boundary for that
# component was empty, so that part of the forest has been
# completed.
#
# TODO This can be parallelized, both in the outer loop over
# each component in the forest and in the computation of the
# minimum. (Same goes for the identical lines outside the loop.)
best_edges = (best_edge(component) for component in forest.to_sets())
best_edges = [edge for edge in best_edges if edge is not None]
# Join trees in the forest using the best edges, and yield that
# edge, since it is part of the spanning tree.
#
# TODO This loop can be parallelized, to an extent (the union
# operation must be atomic).
for u, v, d in best_edges:
if forest[u] != forest[v]:
if data:
yield u, v, d
else:
yield u, v
forest.union(u, v)
def kruskal_mst_edges(G, minimum, weight='weight',
keys=True, data=True, ignore_nan=False):
"""Iterate over edges of a Kruskal's algorithm min/max spanning tree.
Parameters
----------
G : NetworkX Graph
The graph holding the tree of interest.
minimum : bool (default: True)
Find the minimum (True) or maximum (False) spanning tree.
weight : string (default: 'weight')
The name of the edge attribute holding the edge weights.
keys : bool (default: True)
If `G` is a multigraph, `keys` controls whether edge keys ar yielded.
Otherwise `keys` is ignored.
data : bool (default: True)
Flag for whether to yield edge attribute dicts.
If True, yield edges `(u, v, d)`, where `d` is the attribute dict.
If False, yield edges `(u, v)`.
ignore_nan : bool (default: False)
If a NaN is found as an edge weight normally an exception is raised.
If `ignore_nan is True` then that edge is ignored instead.
"""
subtrees = UnionFind()
if G.is_multigraph():
edges = G.edges(keys=True, data=True)
def filter_nan_edges(edges=edges, weight=weight):
sign = 1 if minimum else -1
for u, v, k, d in edges:
wt = d.get(weight, 1) * sign
if isnan(wt):
if ignore_nan:
continue
msg = "NaN found as an edge weight. Edge %s"
raise ValueError(msg % ((u, v, k, d),))
yield wt, u, v, k, d
else:
edges = G.edges(data=True)
def filter_nan_edges(edges=edges, weight=weight):
sign = 1 if minimum else -1
for u, v, d in edges:
wt = d.get(weight, 1) * sign
if isnan(wt):
if ignore_nan:
continue
msg = "NaN found as an edge weight. Edge %s"
raise ValueError(msg % ((u, v, d),))
yield wt, u, v, d
edges = sorted(filter_nan_edges(), key=itemgetter(0))
# Multigraphs need to handle edge keys in addition to edge data.
if G.is_multigraph():
for wt, u, v, k, d in edges:
if subtrees[u] != subtrees[v]:
if keys:
if data:
yield u, v, k, d
else:
yield u, v, k
else:
if data:
yield u, v, d
else:
yield u, v
subtrees.union(u, v)
else:
for wt, u, v, d in edges:
if subtrees[u] != subtrees[v]:
if data:
yield (u, v, d)
else:
yield (u, v)
subtrees.union(u, v)
def prim_mst_edges(G, minimum, weight='weight',
keys=True, data=True, ignore_nan=False):
"""Iterate over edges of Prim's algorithm min/max spanning tree.
Parameters
----------
G : NetworkX Graph
The graph holding the tree of interest.
minimum : bool (default: True)
Find the minimum (True) or maximum (False) spanning tree.
weight : string (default: 'weight')
The name of the edge attribute holding the edge weights.
keys : bool (default: True)
If `G` is a multigraph, `keys` controls whether edge keys ar yielded.
Otherwise `keys` is ignored.
data : bool (default: True)
Flag for whether to yield edge attribute dicts.
If True, yield edges `(u, v, d)`, where `d` is the attribute dict.
If False, yield edges `(u, v)`.
ignore_nan : bool (default: False)
If a NaN is found as an edge weight normally an exception is raised.
If `ignore_nan is True` then that edge is ignored instead.
"""
is_multigraph = G.is_multigraph()
push = heappush
pop = heappop
nodes = set(G)
c = count()
sign = 1 if minimum else -1
while nodes:
u = nodes.pop()
frontier = []
visited = {u}
if is_multigraph:
for v, keydict in G.adj[u].items():
for k, d in keydict.items():
wt = d.get(weight, 1) * sign
if isnan(wt):
if ignore_nan:
continue
msg = "NaN found as an edge weight. Edge %s"
raise ValueError(msg % ((u, v, k, d),))
push(frontier, (wt, next(c), u, v, k, d))
else:
for v, d in G.adj[u].items():
wt = d.get(weight, 1) * sign
if isnan(wt):
if ignore_nan:
continue
msg = "NaN found as an edge weight. Edge %s"
raise ValueError(msg % ((u, v, d),))
push(frontier, (wt, next(c), u, v, d))
while frontier:
if is_multigraph:
W, _, u, v, k, d = pop(frontier)
else:
W, _, u, v, d = pop(frontier)
if v in visited or v not in nodes:
continue
# Multigraphs need to handle edge keys in addition to edge data.
if is_multigraph and keys:
if data:
yield u, v, k, d
else:
yield u, v, k
else:
if data:
yield u, v, d
else:
yield u, v
# update frontier
visited.add(v)
nodes.discard(v)
if is_multigraph:
for w, keydict in G.adj[v].items():
if w in visited:
continue
for k2, d2 in keydict.items():
new_weight = d2.get(weight, 1) * sign
push(frontier, (new_weight, next(c), v, w, k2, d2))
else:
for w, d2 in G.adj[v].items():
if w in visited:
continue
new_weight = d2.get(weight, 1) * sign
push(frontier, (new_weight, next(c), v, w, d2))
ALGORITHMS = {
'boruvka': boruvka_mst_edges,
u'borůvka': boruvka_mst_edges,
'kruskal': kruskal_mst_edges,
'prim': prim_mst_edges
}
@not_implemented_for('directed')
def minimum_spanning_edges(G, algorithm='kruskal', weight='weight',
keys=True, data=True, ignore_nan=False):
"""Generate edges in a minimum spanning forest of an undirected
weighted graph.
A minimum spanning tree is a subgraph of the graph (a tree)
with the minimum sum of edge weights. A spanning forest is a
union of the spanning trees for each connected component of the graph.
Parameters
----------
G : undirected Graph
An undirected graph. If `G` is connected, then the algorithm finds a
spanning tree. Otherwise, a spanning forest is found.
algorithm : string
The algorithm to use when finding a minimum spanning tree. Valid
choices are 'kruskal', 'prim', or 'boruvka'. The default is 'kruskal'.
weight : string
Edge data key to use for weight (default 'weight').
keys : bool
Whether to yield edge key in multigraphs in addition to the edge.
If `G` is not a multigraph, this is ignored.
data : bool, optional
If True yield the edge data along with the edge.
ignore_nan : bool (default: False)
If a NaN is found as an edge weight normally an exception is raised.
If `ignore_nan is True` then that edge is ignored instead.
Returns
-------
edges : iterator
An iterator over edges in a maximum spanning tree of `G`.
Edges connecting nodes `u` and `v` are represented as tuples:
`(u, v, k, d)` or `(u, v, k)` or `(u, v, d)` or `(u, v)`
If `G` is a multigraph, `keys` indicates whether the edge key `k` will
be reported in the third position in the edge tuple. `data` indicates
whether the edge datadict `d` will appear at the end of the edge tuple.
If `G` is not a multigraph, the tuples are `(u, v, d)` if `data` is True
or `(u, v)` if `data` is False.
Examples
--------
>>> from networkx.algorithms import tree
Find minimum spanning edges by Kruskal's algorithm
>>> G = nx.cycle_graph(4)
>>> G.add_edge(0, 3, weight=2)
>>> mst = tree.minimum_spanning_edges(G, algorithm='kruskal', data=False)
>>> edgelist = list(mst)
>>> sorted(sorted(e) for e in edgelist)
[[0, 1], [1, 2], [2, 3]]
Find minimum spanning edges by Prim's algorithm
>>> G = nx.cycle_graph(4)
>>> G.add_edge(0, 3, weight=2)
>>> mst = tree.minimum_spanning_edges(G, algorithm='prim', data=False)
>>> edgelist = list(mst)
>>> sorted(sorted(e) for e in edgelist)
[[0, 1], [1, 2], [2, 3]]
Notes
-----
For Borůvka's algorithm, each edge must have a weight attribute, and
each edge weight must be distinct.
For the other algorithms, if the graph edges do not have a weight
attribute a default weight of 1 will be used.
Modified code from David Eppstein, April 2006
http://www.ics.uci.edu/~eppstein/PADS/
"""
try:
algo = ALGORITHMS[algorithm]
except KeyError:
msg = '{} is not a valid choice for an algorithm.'.format(algorithm)
raise ValueError(msg)
return algo(G, minimum=True, weight=weight, keys=keys, data=data,
ignore_nan=ignore_nan)
@not_implemented_for('directed')
def maximum_spanning_edges(G, algorithm='kruskal', weight='weight',
keys=True, data=True, ignore_nan=False):
"""Generate edges in a maximum spanning forest of an undirected
weighted graph.
A maximum spanning tree is a subgraph of the graph (a tree)
with the maximum possible sum of edge weights. A spanning forest is a
union of the spanning trees for each connected component of the graph.
Parameters
----------
G : undirected Graph
An undirected graph. If `G` is connected, then the algorithm finds a
spanning tree. Otherwise, a spanning forest is found.
algorithm : string
The algorithm to use when finding a maximum spanning tree. Valid
choices are 'kruskal', 'prim', or 'boruvka'. The default is 'kruskal'.
weight : string
Edge data key to use for weight (default 'weight').
keys : bool
Whether to yield edge key in multigraphs in addition to the edge.
If `G` is not a multigraph, this is ignored.
data : bool, optional
If True yield the edge data along with the edge.
ignore_nan : bool (default: False)
If a NaN is found as an edge weight normally an exception is raised.
If `ignore_nan is True` then that edge is ignored instead.
Returns
-------
edges : iterator
An iterator over edges in a maximum spanning tree of `G`.
Edges connecting nodes `u` and `v` are represented as tuples:
`(u, v, k, d)` or `(u, v, k)` or `(u, v, d)` or `(u, v)`
If `G` is a multigraph, `keys` indicates whether the edge key `k` will
be reported in the third position in the edge tuple. `data` indicates
whether the edge datadict `d` will appear at the end of the edge tuple.
If `G` is not a multigraph, the tuples are `(u, v, d)` if `data` is True
or `(u, v)` if `data` is False.
Examples
--------
>>> from networkx.algorithms import tree
Find maximum spanning edges by Kruskal's algorithm
>>> G = nx.cycle_graph(4)
>>> G.add_edge(0, 3, weight=2)
>>> mst = tree.maximum_spanning_edges(G, algorithm='kruskal', data=False)
>>> edgelist = list(mst)
>>> sorted(sorted(e) for e in edgelist)
[[0, 1], [0, 3], [1, 2]]
Find maximum spanning edges by Prim's algorithm
>>> G = nx.cycle_graph(4)
>>> G.add_edge(0, 3, weight=2) # assign weight 2 to edge 0-3
>>> mst = tree.maximum_spanning_edges(G, algorithm='prim', data=False)
>>> edgelist = list(mst)
>>> sorted(sorted(e) for e in edgelist)
[[0, 1], [0, 3], [2, 3]]
Notes
-----
For Borůvka's algorithm, each edge must have a weight attribute, and
each edge weight must be distinct.
For the other algorithms, if the graph edges do not have a weight
attribute a default weight of 1 will be used.
Modified code from David Eppstein, April 2006
http://www.ics.uci.edu/~eppstein/PADS/
"""
try:
algo = ALGORITHMS[algorithm]
except KeyError:
msg = '{} is not a valid choice for an algorithm.'.format(algorithm)
raise ValueError(msg)
return algo(G, minimum=False, weight=weight, keys=keys, data=data,
ignore_nan=ignore_nan)
def minimum_spanning_tree(G, weight='weight', algorithm='kruskal',
ignore_nan=False):
"""Returns a minimum spanning tree or forest on an undirected graph `G`.
Parameters
----------
G : undirected graph
An undirected graph. If `G` is connected, then the algorithm finds a
spanning tree. Otherwise, a spanning forest is found.
weight : str
Data key to use for edge weights.
algorithm : string
The algorithm to use when finding a minimum spanning tree. Valid
choices are 'kruskal', 'prim', or 'boruvka'. The default is
'kruskal'.
ignore_nan : bool (default: False)
If a NaN is found as an edge weight normally an exception is raised.
If `ignore_nan is True` then that edge is ignored instead.
Returns
-------
G : NetworkX Graph
A minimum spanning tree or forest.
Examples
--------
>>> G = nx.cycle_graph(4)
>>> G.add_edge(0, 3, weight=2)
>>> T = nx.minimum_spanning_tree(G)
>>> sorted(T.edges(data=True))
[(0, 1, {}), (1, 2, {}), (2, 3, {})]
Notes
-----
For Borůvka's algorithm, each edge must have a weight attribute, and
each edge weight must be distinct.
For the other algorithms, if the graph edges do not have a weight
attribute a default weight of 1 will be used.
There may be more than one tree with the same minimum or maximum weight.
See :mod:`networkx.tree.recognition` for more detailed definitions.
Isolated nodes with self-loops are in the tree as edgeless isolated nodes.
"""
edges = minimum_spanning_edges(G, algorithm, weight, keys=True,
data=True, ignore_nan=ignore_nan)
T = G.__class__() # Same graph class as G
T.graph.update(G.graph)
T.add_nodes_from(G.nodes.items())
T.add_edges_from(edges)
return T
def maximum_spanning_tree(G, weight='weight', algorithm='kruskal',
ignore_nan=False):
"""Returns a maximum spanning tree or forest on an undirected graph `G`.
Parameters
----------
G : undirected graph
An undirected graph. If `G` is connected, then the algorithm finds a
spanning tree. Otherwise, a spanning forest is found.
weight : str
Data key to use for edge weights.
algorithm : string
The algorithm to use when finding a maximum spanning tree. Valid
choices are 'kruskal', 'prim', or 'boruvka'. The default is
'kruskal'.
ignore_nan : bool (default: False)
If a NaN is found as an edge weight normally an exception is raised.
If `ignore_nan is True` then that edge is ignored instead.
Returns
-------
G : NetworkX Graph
A maximum spanning tree or forest.
Examples
--------
>>> G = nx.cycle_graph(4)
>>> G.add_edge(0, 3, weight=2)
>>> T = nx.maximum_spanning_tree(G)
>>> sorted(T.edges(data=True))
[(0, 1, {}), (0, 3, {'weight': 2}), (1, 2, {})]
Notes
-----
For Borůvka's algorithm, each edge must have a weight attribute, and
each edge weight must be distinct.
For the other algorithms, if the graph edges do not have a weight
attribute a default weight of 1 will be used.
There may be more than one tree with the same minimum or maximum weight.
See :mod:`networkx.tree.recognition` for more detailed definitions.
Isolated nodes with self-loops are in the tree as edgeless isolated nodes.
"""
edges = maximum_spanning_edges(G, algorithm, weight, keys=True,
data=True, ignore_nan=ignore_nan)
edges = list(edges)
T = G.__class__() # Same graph class as G
T.graph.update(G.graph)
T.add_nodes_from(G.nodes.items())
T.add_edges_from(edges)
return T