394 lines
13 KiB
Python
394 lines
13 KiB
Python
# coding=utf8
|
||
# Copyright (C) 2004-2019 by
|
||
# Aric Hagberg <hagberg@lanl.gov>
|
||
# Dan Schult <dschult@colgate.edu>
|
||
# Pieter Swart <swart@lanl.gov>
|
||
# All rights reserved.
|
||
# BSD license.
|
||
#
|
||
# Author: Aric Hagberg (hagberg@lanl.gov)
|
||
"""Betweenness centrality measures."""
|
||
from heapq import heappush, heappop
|
||
from itertools import count
|
||
|
||
import networkx as nx
|
||
from networkx.utils import py_random_state
|
||
from networkx.utils.decorators import not_implemented_for
|
||
|
||
__all__ = ['betweenness_centrality', 'edge_betweenness_centrality',
|
||
'edge_betweenness']
|
||
|
||
|
||
@py_random_state(5)
|
||
@not_implemented_for('multigraph')
|
||
def betweenness_centrality(G, k=None, normalized=True, weight=None,
|
||
endpoints=False, seed=None):
|
||
r"""Compute the shortest-path betweenness centrality for nodes.
|
||
|
||
Betweenness centrality of a node $v$ is the sum of the
|
||
fraction of all-pairs shortest paths that pass through $v$
|
||
|
||
.. math::
|
||
|
||
c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)}
|
||
|
||
where $V$ is the set of nodes, $\sigma(s, t)$ is the number of
|
||
shortest $(s, t)$-paths, and $\sigma(s, t|v)$ is the number of
|
||
those paths passing through some node $v$ other than $s, t$.
|
||
If $s = t$, $\sigma(s, t) = 1$, and if $v \in {s, t}$,
|
||
$\sigma(s, t|v) = 0$ [2]_.
|
||
|
||
Parameters
|
||
----------
|
||
G : graph
|
||
A NetworkX graph.
|
||
|
||
k : int, optional (default=None)
|
||
If k is not None use k node samples to estimate betweenness.
|
||
The value of k <= n where n is the number of nodes in the graph.
|
||
Higher values give better approximation.
|
||
|
||
normalized : bool, optional
|
||
If True the betweenness values are normalized by `2/((n-1)(n-2))`
|
||
for graphs, and `1/((n-1)(n-2))` for directed graphs where `n`
|
||
is the number of nodes in G.
|
||
|
||
weight : None or string, optional (default=None)
|
||
If None, all edge weights are considered equal.
|
||
Otherwise holds the name of the edge attribute used as weight.
|
||
|
||
endpoints : bool, optional
|
||
If True include the endpoints in the shortest path counts.
|
||
|
||
seed : integer, random_state, or None (default)
|
||
Indicator of random number generation state.
|
||
See :ref:`Randomness<randomness>`.
|
||
Note that this is only used if k is not None.
|
||
|
||
Returns
|
||
-------
|
||
nodes : dictionary
|
||
Dictionary of nodes with betweenness centrality as the value.
|
||
|
||
See Also
|
||
--------
|
||
edge_betweenness_centrality
|
||
load_centrality
|
||
|
||
Notes
|
||
-----
|
||
The algorithm is from Ulrik Brandes [1]_.
|
||
See [4]_ for the original first published version and [2]_ for details on
|
||
algorithms for variations and related metrics.
|
||
|
||
For approximate betweenness calculations set k=#samples to use
|
||
k nodes ("pivots") to estimate the betweenness values. For an estimate
|
||
of the number of pivots needed see [3]_.
|
||
|
||
For weighted graphs the edge weights must be greater than zero.
|
||
Zero edge weights can produce an infinite number of equal length
|
||
paths between pairs of nodes.
|
||
|
||
The total number of paths between source and target is counted
|
||
differently for directed and undirected graphs. Directed paths
|
||
are easy to count. Undirected paths are tricky: should a path
|
||
from "u" to "v" count as 1 undirected path or as 2 directed paths?
|
||
|
||
For betweenness_centrality we report the number of undirected
|
||
paths when G is undirected.
|
||
|
||
For betweenness_centrality_subset the reporting is different.
|
||
If the source and target subsets are the same, then we want
|
||
to count undirected paths. But if the source and target subsets
|
||
differ -- for example, if sources is {0} and targets is {1},
|
||
then we are only counting the paths in one direction. They are
|
||
undirected paths but we are counting them in a directed way.
|
||
To count them as undirected paths, each should count as half a path.
|
||
|
||
References
|
||
----------
|
||
.. [1] Ulrik Brandes:
|
||
A Faster Algorithm for Betweenness Centrality.
|
||
Journal of Mathematical Sociology 25(2):163-177, 2001.
|
||
http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf
|
||
.. [2] Ulrik Brandes:
|
||
On Variants of Shortest-Path Betweenness
|
||
Centrality and their Generic Computation.
|
||
Social Networks 30(2):136-145, 2008.
|
||
http://www.inf.uni-konstanz.de/algo/publications/b-vspbc-08.pdf
|
||
.. [3] Ulrik Brandes and Christian Pich:
|
||
Centrality Estimation in Large Networks.
|
||
International Journal of Bifurcation and Chaos 17(7):2303-2318, 2007.
|
||
http://www.inf.uni-konstanz.de/algo/publications/bp-celn-06.pdf
|
||
.. [4] Linton C. Freeman:
|
||
A set of measures of centrality based on betweenness.
|
||
Sociometry 40: 35–41, 1977
|
||
http://moreno.ss.uci.edu/23.pdf
|
||
"""
|
||
betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G
|
||
if k is None:
|
||
nodes = G
|
||
else:
|
||
nodes = seed.sample(G.nodes(), k)
|
||
for s in nodes:
|
||
# single source shortest paths
|
||
if weight is None: # use BFS
|
||
S, P, sigma = _single_source_shortest_path_basic(G, s)
|
||
else: # use Dijkstra's algorithm
|
||
S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight)
|
||
# accumulation
|
||
if endpoints:
|
||
betweenness = _accumulate_endpoints(betweenness, S, P, sigma, s)
|
||
else:
|
||
betweenness = _accumulate_basic(betweenness, S, P, sigma, s)
|
||
# rescaling
|
||
betweenness = _rescale(betweenness, len(G), normalized=normalized,
|
||
directed=G.is_directed(), k=k, endpoints=endpoints)
|
||
return betweenness
|
||
|
||
|
||
@py_random_state(4)
|
||
def edge_betweenness_centrality(G, k=None, normalized=True, weight=None,
|
||
seed=None):
|
||
r"""Compute betweenness centrality for edges.
|
||
|
||
Betweenness centrality of an edge $e$ is the sum of the
|
||
fraction of all-pairs shortest paths that pass through $e$
|
||
|
||
.. math::
|
||
|
||
c_B(e) =\sum_{s,t \in V} \frac{\sigma(s, t|e)}{\sigma(s, t)}
|
||
|
||
where $V$ is the set of nodes, $\sigma(s, t)$ is the number of
|
||
shortest $(s, t)$-paths, and $\sigma(s, t|e)$ is the number of
|
||
those paths passing through edge $e$ [2]_.
|
||
|
||
Parameters
|
||
----------
|
||
G : graph
|
||
A NetworkX graph.
|
||
|
||
k : int, optional (default=None)
|
||
If k is not None use k node samples to estimate betweenness.
|
||
The value of k <= n where n is the number of nodes in the graph.
|
||
Higher values give better approximation.
|
||
|
||
normalized : bool, optional
|
||
If True the betweenness values are normalized by $2/(n(n-1))$
|
||
for graphs, and $1/(n(n-1))$ for directed graphs where $n$
|
||
is the number of nodes in G.
|
||
|
||
weight : None or string, optional (default=None)
|
||
If None, all edge weights are considered equal.
|
||
Otherwise holds the name of the edge attribute used as weight.
|
||
|
||
seed : integer, random_state, or None (default)
|
||
Indicator of random number generation state.
|
||
See :ref:`Randomness<randomness>`.
|
||
Note that this is only used if k is not None.
|
||
|
||
Returns
|
||
-------
|
||
edges : dictionary
|
||
Dictionary of edges with betweenness centrality as the value.
|
||
|
||
See Also
|
||
--------
|
||
betweenness_centrality
|
||
edge_load
|
||
|
||
Notes
|
||
-----
|
||
The algorithm is from Ulrik Brandes [1]_.
|
||
|
||
For weighted graphs the edge weights must be greater than zero.
|
||
Zero edge weights can produce an infinite number of equal length
|
||
paths between pairs of nodes.
|
||
|
||
References
|
||
----------
|
||
.. [1] A Faster Algorithm for Betweenness Centrality. Ulrik Brandes,
|
||
Journal of Mathematical Sociology 25(2):163-177, 2001.
|
||
http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf
|
||
.. [2] Ulrik Brandes: On Variants of Shortest-Path Betweenness
|
||
Centrality and their Generic Computation.
|
||
Social Networks 30(2):136-145, 2008.
|
||
http://www.inf.uni-konstanz.de/algo/publications/b-vspbc-08.pdf
|
||
"""
|
||
betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G
|
||
# b[e]=0 for e in G.edges()
|
||
betweenness.update(dict.fromkeys(G.edges(), 0.0))
|
||
if k is None:
|
||
nodes = G
|
||
else:
|
||
nodes = seed.sample(G.nodes(), k)
|
||
for s in nodes:
|
||
# single source shortest paths
|
||
if weight is None: # use BFS
|
||
S, P, sigma = _single_source_shortest_path_basic(G, s)
|
||
else: # use Dijkstra's algorithm
|
||
S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight)
|
||
# accumulation
|
||
betweenness = _accumulate_edges(betweenness, S, P, sigma, s)
|
||
# rescaling
|
||
for n in G: # remove nodes to only return edges
|
||
del betweenness[n]
|
||
betweenness = _rescale_e(betweenness, len(G), normalized=normalized,
|
||
directed=G.is_directed())
|
||
return betweenness
|
||
|
||
# obsolete name
|
||
|
||
|
||
def edge_betweenness(G, k=None, normalized=True, weight=None, seed=None):
|
||
return edge_betweenness_centrality(G, k, normalized, weight, seed)
|
||
|
||
|
||
# helpers for betweenness centrality
|
||
|
||
def _single_source_shortest_path_basic(G, s):
|
||
S = []
|
||
P = {}
|
||
for v in G:
|
||
P[v] = []
|
||
sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G
|
||
D = {}
|
||
sigma[s] = 1.0
|
||
D[s] = 0
|
||
Q = [s]
|
||
while Q: # use BFS to find shortest paths
|
||
v = Q.pop(0)
|
||
S.append(v)
|
||
Dv = D[v]
|
||
sigmav = sigma[v]
|
||
for w in G[v]:
|
||
if w not in D:
|
||
Q.append(w)
|
||
D[w] = Dv + 1
|
||
if D[w] == Dv + 1: # this is a shortest path, count paths
|
||
sigma[w] += sigmav
|
||
P[w].append(v) # predecessors
|
||
return S, P, sigma
|
||
|
||
|
||
def _single_source_dijkstra_path_basic(G, s, weight):
|
||
# modified from Eppstein
|
||
S = []
|
||
P = {}
|
||
for v in G:
|
||
P[v] = []
|
||
sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G
|
||
D = {}
|
||
sigma[s] = 1.0
|
||
push = heappush
|
||
pop = heappop
|
||
seen = {s: 0}
|
||
c = count()
|
||
Q = [] # use Q as heap with (distance,node id) tuples
|
||
push(Q, (0, next(c), s, s))
|
||
while Q:
|
||
(dist, _, pred, v) = pop(Q)
|
||
if v in D:
|
||
continue # already searched this node.
|
||
sigma[v] += sigma[pred] # count paths
|
||
S.append(v)
|
||
D[v] = dist
|
||
for w, edgedata in G[v].items():
|
||
vw_dist = dist + edgedata.get(weight, 1)
|
||
if w not in D and (w not in seen or vw_dist < seen[w]):
|
||
seen[w] = vw_dist
|
||
push(Q, (vw_dist, next(c), v, w))
|
||
sigma[w] = 0.0
|
||
P[w] = [v]
|
||
elif vw_dist == seen[w]: # handle equal paths
|
||
sigma[w] += sigma[v]
|
||
P[w].append(v)
|
||
return S, P, sigma
|
||
|
||
|
||
def _accumulate_basic(betweenness, S, P, sigma, s):
|
||
delta = dict.fromkeys(S, 0)
|
||
while S:
|
||
w = S.pop()
|
||
coeff = (1 + delta[w]) / sigma[w]
|
||
for v in P[w]:
|
||
delta[v] += sigma[v] * coeff
|
||
if w != s:
|
||
betweenness[w] += delta[w]
|
||
return betweenness
|
||
|
||
|
||
def _accumulate_endpoints(betweenness, S, P, sigma, s):
|
||
betweenness[s] += len(S) - 1
|
||
delta = dict.fromkeys(S, 0)
|
||
while S:
|
||
w = S.pop()
|
||
coeff = (1 + delta[w]) / sigma[w]
|
||
for v in P[w]:
|
||
delta[v] += sigma[v] * coeff
|
||
if w != s:
|
||
betweenness[w] += delta[w] + 1
|
||
return betweenness
|
||
|
||
|
||
def _accumulate_edges(betweenness, S, P, sigma, s):
|
||
delta = dict.fromkeys(S, 0)
|
||
while S:
|
||
w = S.pop()
|
||
coeff = (1 + delta[w]) / sigma[w]
|
||
for v in P[w]:
|
||
c = sigma[v] * coeff
|
||
if (v, w) not in betweenness:
|
||
betweenness[(w, v)] += c
|
||
else:
|
||
betweenness[(v, w)] += c
|
||
delta[v] += c
|
||
if w != s:
|
||
betweenness[w] += delta[w]
|
||
return betweenness
|
||
|
||
|
||
def _rescale(betweenness, n, normalized,
|
||
directed=False, k=None, endpoints=False):
|
||
if normalized:
|
||
if endpoints:
|
||
if n < 2:
|
||
scale = None # no normalization
|
||
else:
|
||
# Scale factor should include endpoint nodes
|
||
scale = 1 / (n * (n - 1))
|
||
elif n <= 2:
|
||
scale = None # no normalization b=0 for all nodes
|
||
else:
|
||
scale = 1 / ((n - 1) * (n - 2))
|
||
else: # rescale by 2 for undirected graphs
|
||
if not directed:
|
||
scale = 0.5
|
||
else:
|
||
scale = None
|
||
if scale is not None:
|
||
if k is not None:
|
||
scale = scale * n / k
|
||
for v in betweenness:
|
||
betweenness[v] *= scale
|
||
return betweenness
|
||
|
||
|
||
def _rescale_e(betweenness, n, normalized, directed=False, k=None):
|
||
if normalized:
|
||
if n <= 1:
|
||
scale = None # no normalization b=0 for all nodes
|
||
else:
|
||
scale = 1 / (n * (n - 1))
|
||
else: # rescale by 2 for undirected graphs
|
||
if not directed:
|
||
scale = 0.5
|
||
else:
|
||
scale = None
|
||
if scale is not None:
|
||
if k is not None:
|
||
scale = scale * n / k
|
||
for v in betweenness:
|
||
betweenness[v] *= scale
|
||
return betweenness
|