366 lines
13 KiB
Python
366 lines
13 KiB
Python
|
# Copyright (C) 2010-2019 by
|
||
|
# Aric Hagberg <hagberg@lanl.gov>
|
||
|
# Dan Schult <dschult@colgate.edu>
|
||
|
# Pieter Swart <swart@lanl.gov>
|
||
|
# All rights reserved.
|
||
|
# BSD license.
|
||
|
#
|
||
|
# Author: Aric Hagberg (hagberg@lanl.gov)
|
||
|
"""Current-flow betweenness centrality measures."""
|
||
|
import networkx as nx
|
||
|
from networkx.algorithms.centrality.flow_matrix import *
|
||
|
from networkx.utils import (not_implemented_for,
|
||
|
reverse_cuthill_mckee_ordering,
|
||
|
py_random_state)
|
||
|
|
||
|
__all__ = ['current_flow_betweenness_centrality',
|
||
|
'approximate_current_flow_betweenness_centrality',
|
||
|
'edge_current_flow_betweenness_centrality']
|
||
|
|
||
|
|
||
|
@py_random_state(7)
|
||
|
@not_implemented_for('directed')
|
||
|
def approximate_current_flow_betweenness_centrality(G, normalized=True,
|
||
|
weight=None,
|
||
|
dtype=float, solver='full',
|
||
|
epsilon=0.5, kmax=10000,
|
||
|
seed=None):
|
||
|
r"""Compute the approximate current-flow betweenness centrality for nodes.
|
||
|
|
||
|
Approximates the current-flow betweenness centrality within absolute
|
||
|
error of epsilon with high probability [1]_.
|
||
|
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
G : graph
|
||
|
A NetworkX graph
|
||
|
|
||
|
normalized : bool, optional (default=True)
|
||
|
If True the betweenness values are normalized by 2/[(n-1)(n-2)] where
|
||
|
n is the number of nodes in G.
|
||
|
|
||
|
weight : string or None, optional (default=None)
|
||
|
Key for edge data used as the edge weight.
|
||
|
If None, then use 1 as each edge weight.
|
||
|
|
||
|
dtype : data type (float)
|
||
|
Default data type for internal matrices.
|
||
|
Set to np.float32 for lower memory consumption.
|
||
|
|
||
|
solver : string (default='lu')
|
||
|
Type of linear solver to use for computing the flow matrix.
|
||
|
Options are "full" (uses most memory), "lu" (recommended), and
|
||
|
"cg" (uses least memory).
|
||
|
|
||
|
epsilon: float
|
||
|
Absolute error tolerance.
|
||
|
|
||
|
kmax: int
|
||
|
Maximum number of sample node pairs to use for approximation.
|
||
|
|
||
|
seed : integer, random_state, or None (default)
|
||
|
Indicator of random number generation state.
|
||
|
See :ref:`Randomness<randomness>`.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
nodes : dictionary
|
||
|
Dictionary of nodes with betweenness centrality as the value.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
current_flow_betweenness_centrality
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
The running time is $O((1/\epsilon^2)m{\sqrt k} \log n)$
|
||
|
and the space required is $O(m)$ for $n$ nodes and $m$ edges.
|
||
|
|
||
|
If the edges have a 'weight' attribute they will be used as
|
||
|
weights in this algorithm. Unspecified weights are set to 1.
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
.. [1] Ulrik Brandes and Daniel Fleischer:
|
||
|
Centrality Measures Based on Current Flow.
|
||
|
Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).
|
||
|
LNCS 3404, pp. 533-544. Springer-Verlag, 2005.
|
||
|
http://algo.uni-konstanz.de/publications/bf-cmbcf-05.pdf
|
||
|
"""
|
||
|
try:
|
||
|
import numpy as np
|
||
|
except ImportError:
|
||
|
raise ImportError('current_flow_betweenness_centrality requires NumPy ',
|
||
|
'http://scipy.org/')
|
||
|
try:
|
||
|
from scipy import sparse
|
||
|
from scipy.sparse import linalg
|
||
|
except ImportError:
|
||
|
raise ImportError('current_flow_betweenness_centrality requires SciPy ',
|
||
|
'http://scipy.org/')
|
||
|
if not nx.is_connected(G):
|
||
|
raise nx.NetworkXError("Graph not connected.")
|
||
|
solvername = {"full": FullInverseLaplacian,
|
||
|
"lu": SuperLUInverseLaplacian,
|
||
|
"cg": CGInverseLaplacian}
|
||
|
n = G.number_of_nodes()
|
||
|
ordering = list(reverse_cuthill_mckee_ordering(G))
|
||
|
# make a copy with integer labels according to rcm ordering
|
||
|
# this could be done without a copy if we really wanted to
|
||
|
H = nx.relabel_nodes(G, dict(zip(ordering, range(n))))
|
||
|
L = laplacian_sparse_matrix(H, nodelist=range(n), weight=weight,
|
||
|
dtype=dtype, format='csc')
|
||
|
C = solvername[solver](L, dtype=dtype) # initialize solver
|
||
|
betweenness = dict.fromkeys(H, 0.0)
|
||
|
nb = (n - 1.0) * (n - 2.0) # normalization factor
|
||
|
cstar = n * (n - 1) / nb
|
||
|
l = 1 # parameter in approximation, adjustable
|
||
|
k = l * int(np.ceil((cstar / epsilon)**2 * np.log(n)))
|
||
|
if k > kmax:
|
||
|
msg = 'Number random pairs k>kmax (%d>%d) ' % (k, kmax)
|
||
|
raise nx.NetworkXError(msg, 'Increase kmax or epsilon')
|
||
|
cstar2k = cstar / (2 * k)
|
||
|
for i in range(k):
|
||
|
s, t = seed.sample(range(n), 2)
|
||
|
b = np.zeros(n, dtype=dtype)
|
||
|
b[s] = 1
|
||
|
b[t] = -1
|
||
|
p = C.solve(b)
|
||
|
for v in H:
|
||
|
if v == s or v == t:
|
||
|
continue
|
||
|
for nbr in H[v]:
|
||
|
w = H[v][nbr].get(weight, 1.0)
|
||
|
betweenness[v] += w * np.abs(p[v] - p[nbr]) * cstar2k
|
||
|
if normalized:
|
||
|
factor = 1.0
|
||
|
else:
|
||
|
factor = nb / 2.0
|
||
|
# remap to original node names and "unnormalize" if required
|
||
|
return dict((ordering[k], float(v * factor)) for k, v in betweenness.items())
|
||
|
|
||
|
|
||
|
@not_implemented_for('directed')
|
||
|
def current_flow_betweenness_centrality(G, normalized=True, weight=None,
|
||
|
dtype=float, solver='full'):
|
||
|
r"""Compute current-flow betweenness centrality for nodes.
|
||
|
|
||
|
Current-flow betweenness centrality uses an electrical current
|
||
|
model for information spreading in contrast to betweenness
|
||
|
centrality which uses shortest paths.
|
||
|
|
||
|
Current-flow betweenness centrality is also known as
|
||
|
random-walk betweenness centrality [2]_.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
G : graph
|
||
|
A NetworkX graph
|
||
|
|
||
|
normalized : bool, optional (default=True)
|
||
|
If True the betweenness values are normalized by 2/[(n-1)(n-2)] where
|
||
|
n is the number of nodes in G.
|
||
|
|
||
|
weight : string or None, optional (default=None)
|
||
|
Key for edge data used as the edge weight.
|
||
|
If None, then use 1 as each edge weight.
|
||
|
|
||
|
dtype : data type (float)
|
||
|
Default data type for internal matrices.
|
||
|
Set to np.float32 for lower memory consumption.
|
||
|
|
||
|
solver : string (default='lu')
|
||
|
Type of linear solver to use for computing the flow matrix.
|
||
|
Options are "full" (uses most memory), "lu" (recommended), and
|
||
|
"cg" (uses least memory).
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
nodes : dictionary
|
||
|
Dictionary of nodes with betweenness centrality as the value.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
approximate_current_flow_betweenness_centrality
|
||
|
betweenness_centrality
|
||
|
edge_betweenness_centrality
|
||
|
edge_current_flow_betweenness_centrality
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
Current-flow betweenness can be computed in $O(I(n-1)+mn \log n)$
|
||
|
time [1]_, where $I(n-1)$ is the time needed to compute the
|
||
|
inverse Laplacian. For a full matrix this is $O(n^3)$ but using
|
||
|
sparse methods you can achieve $O(nm{\sqrt k})$ where $k$ is the
|
||
|
Laplacian matrix condition number.
|
||
|
|
||
|
The space required is $O(nw)$ where $w$ is the width of the sparse
|
||
|
Laplacian matrix. Worse case is $w=n$ for $O(n^2)$.
|
||
|
|
||
|
If the edges have a 'weight' attribute they will be used as
|
||
|
weights in this algorithm. Unspecified weights are set to 1.
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
.. [1] Centrality Measures Based on Current Flow.
|
||
|
Ulrik Brandes and Daniel Fleischer,
|
||
|
Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).
|
||
|
LNCS 3404, pp. 533-544. Springer-Verlag, 2005.
|
||
|
http://algo.uni-konstanz.de/publications/bf-cmbcf-05.pdf
|
||
|
|
||
|
.. [2] A measure of betweenness centrality based on random walks,
|
||
|
M. E. J. Newman, Social Networks 27, 39-54 (2005).
|
||
|
"""
|
||
|
try:
|
||
|
import numpy as np
|
||
|
except ImportError:
|
||
|
raise ImportError('current_flow_betweenness_centrality requires NumPy ',
|
||
|
'http://scipy.org/')
|
||
|
try:
|
||
|
import scipy
|
||
|
except ImportError:
|
||
|
raise ImportError('current_flow_betweenness_centrality requires SciPy ',
|
||
|
'http://scipy.org/')
|
||
|
if not nx.is_connected(G):
|
||
|
raise nx.NetworkXError("Graph not connected.")
|
||
|
n = G.number_of_nodes()
|
||
|
ordering = list(reverse_cuthill_mckee_ordering(G))
|
||
|
# make a copy with integer labels according to rcm ordering
|
||
|
# this could be done without a copy if we really wanted to
|
||
|
H = nx.relabel_nodes(G, dict(zip(ordering, range(n))))
|
||
|
betweenness = dict.fromkeys(H, 0.0) # b[v]=0 for v in H
|
||
|
for row, (s, t) in flow_matrix_row(H, weight=weight, dtype=dtype,
|
||
|
solver=solver):
|
||
|
pos = dict(zip(row.argsort()[::-1], range(n)))
|
||
|
for i in range(n):
|
||
|
betweenness[s] += (i - pos[i]) * row[i]
|
||
|
betweenness[t] += (n - i - 1 - pos[i]) * row[i]
|
||
|
if normalized:
|
||
|
nb = (n - 1.0) * (n - 2.0) # normalization factor
|
||
|
else:
|
||
|
nb = 2.0
|
||
|
for v in H:
|
||
|
betweenness[v] = float((betweenness[v] - v) * 2.0 / nb)
|
||
|
return dict((ordering[k], v) for k, v in betweenness.items())
|
||
|
|
||
|
|
||
|
@not_implemented_for('directed')
|
||
|
def edge_current_flow_betweenness_centrality(G, normalized=True,
|
||
|
weight=None,
|
||
|
dtype=float, solver='full'):
|
||
|
r"""Compute current-flow betweenness centrality for edges.
|
||
|
|
||
|
Current-flow betweenness centrality uses an electrical current
|
||
|
model for information spreading in contrast to betweenness
|
||
|
centrality which uses shortest paths.
|
||
|
|
||
|
Current-flow betweenness centrality is also known as
|
||
|
random-walk betweenness centrality [2]_.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
G : graph
|
||
|
A NetworkX graph
|
||
|
|
||
|
normalized : bool, optional (default=True)
|
||
|
If True the betweenness values are normalized by 2/[(n-1)(n-2)] where
|
||
|
n is the number of nodes in G.
|
||
|
|
||
|
weight : string or None, optional (default=None)
|
||
|
Key for edge data used as the edge weight.
|
||
|
If None, then use 1 as each edge weight.
|
||
|
|
||
|
dtype : data type (default=float)
|
||
|
Default data type for internal matrices.
|
||
|
Set to np.float32 for lower memory consumption.
|
||
|
|
||
|
solver : string (default='lu')
|
||
|
Type of linear solver to use for computing the flow matrix.
|
||
|
Options are "full" (uses most memory), "lu" (recommended), and
|
||
|
"cg" (uses least memory).
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
nodes : dictionary
|
||
|
Dictionary of edge tuples with betweenness centrality as the value.
|
||
|
|
||
|
Raises
|
||
|
------
|
||
|
NetworkXError
|
||
|
The algorithm does not support DiGraphs.
|
||
|
If the input graph is an instance of DiGraph class, NetworkXError
|
||
|
is raised.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
betweenness_centrality
|
||
|
edge_betweenness_centrality
|
||
|
current_flow_betweenness_centrality
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
Current-flow betweenness can be computed in $O(I(n-1)+mn \log n)$
|
||
|
time [1]_, where $I(n-1)$ is the time needed to compute the
|
||
|
inverse Laplacian. For a full matrix this is $O(n^3)$ but using
|
||
|
sparse methods you can achieve $O(nm{\sqrt k})$ where $k$ is the
|
||
|
Laplacian matrix condition number.
|
||
|
|
||
|
The space required is $O(nw)$ where $w$ is the width of the sparse
|
||
|
Laplacian matrix. Worse case is $w=n$ for $O(n^2)$.
|
||
|
|
||
|
If the edges have a 'weight' attribute they will be used as
|
||
|
weights in this algorithm. Unspecified weights are set to 1.
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
.. [1] Centrality Measures Based on Current Flow.
|
||
|
Ulrik Brandes and Daniel Fleischer,
|
||
|
Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).
|
||
|
LNCS 3404, pp. 533-544. Springer-Verlag, 2005.
|
||
|
http://algo.uni-konstanz.de/publications/bf-cmbcf-05.pdf
|
||
|
|
||
|
.. [2] A measure of betweenness centrality based on random walks,
|
||
|
M. E. J. Newman, Social Networks 27, 39-54 (2005).
|
||
|
"""
|
||
|
from networkx.utils import reverse_cuthill_mckee_ordering
|
||
|
try:
|
||
|
import numpy as np
|
||
|
except ImportError:
|
||
|
raise ImportError('current_flow_betweenness_centrality requires NumPy ',
|
||
|
'http://scipy.org/')
|
||
|
try:
|
||
|
import scipy
|
||
|
except ImportError:
|
||
|
raise ImportError('current_flow_betweenness_centrality requires SciPy ',
|
||
|
'http://scipy.org/')
|
||
|
if not nx.is_connected(G):
|
||
|
raise nx.NetworkXError("Graph not connected.")
|
||
|
n = G.number_of_nodes()
|
||
|
ordering = list(reverse_cuthill_mckee_ordering(G))
|
||
|
# make a copy with integer labels according to rcm ordering
|
||
|
# this could be done without a copy if we really wanted to
|
||
|
H = nx.relabel_nodes(G, dict(zip(ordering, range(n))))
|
||
|
edges = (tuple(sorted((u, v))) for u, v in H.edges())
|
||
|
betweenness = dict.fromkeys(edges, 0.0)
|
||
|
if normalized:
|
||
|
nb = (n - 1.0) * (n - 2.0) # normalization factor
|
||
|
else:
|
||
|
nb = 2.0
|
||
|
for row, (e) in flow_matrix_row(H, weight=weight, dtype=dtype,
|
||
|
solver=solver):
|
||
|
pos = dict(zip(row.argsort()[::-1], range(1, n + 1)))
|
||
|
for i in range(n):
|
||
|
betweenness[e] += (i + 1 - pos[i]) * row[i]
|
||
|
betweenness[e] += (n - i - pos[i]) * row[i]
|
||
|
betweenness[e] /= nb
|
||
|
return dict(((ordering[s], ordering[t]), float(v))
|
||
|
for (s, t), v in betweenness.items())
|
||
|
|
||
|
|
||
|
# fixture for pytest
|
||
|
def setup_module(module):
|
||
|
import pytest
|
||
|
numpy = pytest.importorskip('numpy')
|
||
|
scipy = pytest.importorskip('scipy')
|