mightyscape-1.1-deprecated/extensions/fablabchemnitz/networkx/algorithms/connectivity/kcomponents.py

# -*- coding: utf-8 -*-
"""
Moody and White algorithm for k-components
"""
from collections import defaultdict
from itertools import combinations
from operator import itemgetter

import networkx as nx
from networkx.utils import not_implemented_for
# Define the default maximum flow function.
from networkx.algorithms.flow import edmonds_karp
default_flow_func = edmonds_karp

__author__ = '\n'.join(['Jordi Torrents <jtorrents@milnou.net>'])

__all__ = ['k_components']


@not_implemented_for('directed')
def k_components(G, flow_func=None):
    r"""Returns the k-component structure of a graph G.

    A `k`-component is a maximal subgraph of a graph G that has, at least,
    node connectivity `k`: we need to remove at least `k` nodes to break it
    into more components. `k`-components have an inherent hierarchical
    structure because they are nested in terms of connectivity: a connected
    graph can contain several 2-components, each of which can contain
    one or more 3-components, and so forth.

    Parameters
    ----------
    G : NetworkX graph

    flow_func : function
        Function to perform the underlying flow computations. Default value
        :meth:`edmonds_karp`. This function performs better in sparse graphs with
        right tailed degree distributions. :meth:`shortest_augmenting_path` will
        perform better in denser graphs.

    Returns
    -------
    k_components : dict
        Dictionary with all connectivity levels `k` in the input Graph as keys
        and a list of sets of nodes that form a k-component of level `k` as
        values.

    Raises
    ------
    NetworkXNotImplemented:
        If the input graph is directed.

    Examples
    --------
    >>> # Petersen graph has 10 nodes and it is triconnected, thus all
    >>> # nodes are in a single component on all three connectivity levels
    >>> G = nx.petersen_graph()
    >>> k_components = nx.k_components(G)

    Notes
    -----
    Moody and White [1]_ (appendix A) provide an algorithm for identifying
    k-components in a graph, which is based on Kanevsky's algorithm [2]_
    for finding all minimum-size node cut-sets of a graph (implemented in
    :meth:`all_node_cuts` function):

        1. Compute node connectivity, k, of the input graph G.

        2. Identify all k-cutsets at the current level of connectivity using
           Kanevsky's algorithm.

        3. Generate new graph components based on the removal of
           these cutsets. Nodes in a cutset belong to both sides
           of the induced cut.

        4. If the graph is neither complete nor trivial, return to 1;
           else end.

    This implementation also uses some heuristics (see [3]_ for details)
    to speed up the computation.

    See also
    --------
    node_connectivity
    all_node_cuts
    biconnected_components : special case of this function when k=2
    k_edge_components : similar to this function, but uses edge-connectivity
        instead of node-connectivity

    References
    ----------
    .. [1]  Moody, J. and D. White (2003). Social cohesion and embeddedness:
            A hierarchical conception of social groups.
            American Sociological Review 68(1), 103--28.
            http://www2.asanet.org/journals/ASRFeb03MoodyWhite.pdf

    .. [2]  Kanevsky, A. (1993). Finding all minimum-size separating vertex
            sets in a graph. Networks 23(6), 533--541.
            http://onlinelibrary.wiley.com/doi/10.1002/net.3230230604/abstract

    .. [3]  Torrents, J. and F. Ferraro (2015). Structural Cohesion:
            Visualization and Heuristics for Fast Computation.
            https://arxiv.org/pdf/1503.04476v1

    """
    # Dictionary with connectivity level (k) as keys and a list of
    # sets of nodes that form a k-component as values. Note that
    # k-compoents can overlap (but only k - 1 nodes).
    k_components = defaultdict(list)
    # Define default flow function
    if flow_func is None:
        flow_func = default_flow_func
    # Bicomponents as a base to check for higher order k-components
    for component in nx.connected_components(G):
        # isolated nodes have connectivity 0
        comp = set(component)
        if len(comp) > 1:
            k_components[1].append(comp)
    bicomponents = [G.subgraph(c) for c in nx.biconnected_components(G)]
    for bicomponent in bicomponents:
        bicomp = set(bicomponent)
        # avoid considering dyads as bicomponents
        if len(bicomp) > 2:
            k_components[2].append(bicomp)
    for B in bicomponents:
        if len(B) <= 2:
            continue
        k = nx.node_connectivity(B, flow_func=flow_func)
        if k > 2:
            k_components[k].append(set(B))
        # Perform cuts in a DFS like order.
        cuts = list(nx.all_node_cuts(B, k=k, flow_func=flow_func))
        stack = [(k, _generate_partition(B, cuts, k))]
        while stack:
            (parent_k, partition) = stack[-1]
            try:
                nodes = next(partition)
                C = B.subgraph(nodes)
                this_k = nx.node_connectivity(C, flow_func=flow_func)
                if this_k > parent_k and this_k > 2:
                    k_components[this_k].append(set(C))
                cuts = list(nx.all_node_cuts(C, k=this_k, flow_func=flow_func))
                if cuts:
                    stack.append((this_k, _generate_partition(C, cuts, this_k)))
            except StopIteration:
                stack.pop()

    # This is necessary because k-components may only be reported at their
    # maximum k level. But we want to return a dictionary in which keys are
    # connectivity levels and values list of sets of components, without
    # skipping any connectivity level. Also, it's possible that subsets of
    # an already detected k-component appear at a level k. Checking for this
    # in the while loop above penalizes the common case. Thus we also have to
    # _consolidate all connectivity levels in _reconstruct_k_components.
    return _reconstruct_k_components(k_components)


def _consolidate(sets, k):
    """Merge sets that share k or more elements.

    See: http://rosettacode.org/wiki/Set_consolidation

    The iterative python implementation posted there is
    faster than this because of the overhead of building a
    Graph and calling nx.connected_components, but it's not
    clear for us if we can use it in NetworkX because there
    is no licence for the code.

    """
    G = nx.Graph()
    nodes = {i: s for i, s in enumerate(sets)}
    G.add_nodes_from(nodes)
    G.add_edges_from((u, v) for u, v in combinations(nodes, 2)
                     if len(nodes[u] & nodes[v]) >= k)
    for component in nx.connected_components(G):
        yield set.union(*[nodes[n] for n in component])


def _generate_partition(G, cuts, k):
    def has_nbrs_in_partition(G, node, partition):
        for n in G[node]:
            if n in partition:
                return True
        return False
    components = []
    nodes = ({n for n, d in G.degree() if d > k} -
             {n for cut in cuts for n in cut})
    H = G.subgraph(nodes)
    for cc in nx.connected_components(H):
        component = set(cc)
        for cut in cuts:
            for node in cut:
                if has_nbrs_in_partition(G, node, cc):
                    component.add(node)
        if len(component) < G.order():
            components.append(component)
    for component in _consolidate(components, k + 1):
        yield component


def _reconstruct_k_components(k_comps):
    result = dict()
    max_k = max(k_comps)
    for k in reversed(range(1, max_k + 1)):
        if k == max_k:
            result[k] = list(_consolidate(k_comps[k], k))
        elif k not in k_comps:
            result[k] = list(_consolidate(result[k + 1], k))
        else:
            nodes_at_k = set.union(*k_comps[k])
            to_add = [c for c in result[k + 1] if any(n not in nodes_at_k for n in c)]
            if to_add:
                result[k] = list(_consolidate(k_comps[k] + to_add, k))
            else:
                result[k] = list(_consolidate(k_comps[k], k))
    return result


def build_k_number_dict(kcomps):
    result = {}
    for k, comps in sorted(kcomps.items(), key=itemgetter(0)):
        for comp in comps:
            for node in comp:
                result[node] = k
    return result
Initial commit 2020-07-30 01:16:18 +02:00			`# -- coding: utf-8 --`
			`"""`
			`Moody and White algorithm for k-components`
			`"""`
			`from collections import defaultdict`
			`from itertools import combinations`
			`from operator import itemgetter`

			`import networkx as nx`
			`from networkx.utils import not_implemented_for`
			`# Define the default maximum flow function.`
			`from networkx.algorithms.flow import edmonds_karp`
			`default_flow_func = edmonds_karp`

			`__author__ = '\n'.join(['Jordi Torrents <jtorrents@milnou.net>'])`

			`__all__ = ['k_components']`


			`@not_implemented_for('directed')`
			`def k_components(G, flow_func=None):`
			`r"""Returns the k-component structure of a graph G.`

			A `k`-component is a maximal subgraph of a graph G that has, at least,
			node connectivity `k`: we need to remove at least `k` nodes to break it
			into more components. `k`-components have an inherent hierarchical
			`structure because they are nested in terms of connectivity: a connected`
			`graph can contain several 2-components, each of which can contain`
			`one or more 3-components, and so forth.`

			`Parameters`
			`----------`
			`G : NetworkX graph`

			`flow_func : function`
			`Function to perform the underlying flow computations. Default value`
			:meth:`edmonds_karp`. This function performs better in sparse graphs with
			right tailed degree distributions. :meth:`shortest_augmenting_path` will
			`perform better in denser graphs.`

			`Returns`
			`-------`
			`k_components : dict`
			Dictionary with all connectivity levels `k` in the input Graph as keys
			and a list of sets of nodes that form a k-component of level `k` as
			`values.`

			`Raises`
			`------`
			`NetworkXNotImplemented:`
			`If the input graph is directed.`

			`Examples`
			`--------`
			`>>> # Petersen graph has 10 nodes and it is triconnected, thus all`
			`>>> # nodes are in a single component on all three connectivity levels`
			`>>> G = nx.petersen_graph()`
			`>>> k_components = nx.k_components(G)`

			`Notes`
			`-----`
			`Moody and White [1]_ (appendix A) provide an algorithm for identifying`
			`k-components in a graph, which is based on Kanevsky's algorithm [2]_`
			`for finding all minimum-size node cut-sets of a graph (implemented in`
			:meth:`all_node_cuts` function):

			`1. Compute node connectivity, k, of the input graph G.`

			`2. Identify all k-cutsets at the current level of connectivity using`
			`Kanevsky's algorithm.`

			`3. Generate new graph components based on the removal of`
			`these cutsets. Nodes in a cutset belong to both sides`
			`of the induced cut.`

			`4. If the graph is neither complete nor trivial, return to 1;`
			`else end.`

			`This implementation also uses some heuristics (see [3]_ for details)`
			`to speed up the computation.`

			`See also`
			`--------`
			`node_connectivity`
			`all_node_cuts`
			`biconnected_components : special case of this function when k=2`
			`k_edge_components : similar to this function, but uses edge-connectivity`
			`instead of node-connectivity`

			`References`
			`----------`
			`.. [1] Moody, J. and D. White (2003). Social cohesion and embeddedness:`
			`A hierarchical conception of social groups.`
			`American Sociological Review 68(1), 103--28.`
			`http://www2.asanet.org/journals/ASRFeb03MoodyWhite.pdf`

			`.. [2] Kanevsky, A. (1993). Finding all minimum-size separating vertex`
			`sets in a graph. Networks 23(6), 533--541.`
			`http://onlinelibrary.wiley.com/doi/10.1002/net.3230230604/abstract`

			`.. [3] Torrents, J. and F. Ferraro (2015). Structural Cohesion:`
			`Visualization and Heuristics for Fast Computation.`
			`https://arxiv.org/pdf/1503.04476v1`

			`"""`
			`# Dictionary with connectivity level (k) as keys and a list of`
			`# sets of nodes that form a k-component as values. Note that`
			`# k-compoents can overlap (but only k - 1 nodes).`
			`k_components = defaultdict(list)`
			`# Define default flow function`
			`if flow_func is None:`
			`flow_func = default_flow_func`
			`# Bicomponents as a base to check for higher order k-components`
			`for component in nx.connected_components(G):`
			`# isolated nodes have connectivity 0`
			`comp = set(component)`
			`if len(comp) > 1:`
			`k_components[1].append(comp)`
			`bicomponents = [G.subgraph(c) for c in nx.biconnected_components(G)]`
			`for bicomponent in bicomponents:`
			`bicomp = set(bicomponent)`
			`# avoid considering dyads as bicomponents`
			`if len(bicomp) > 2:`
			`k_components[2].append(bicomp)`
			`for B in bicomponents:`
			`if len(B) <= 2:`
			`continue`
			`k = nx.node_connectivity(B, flow_func=flow_func)`
			`if k > 2:`
			`k_components[k].append(set(B))`
			`# Perform cuts in a DFS like order.`
			`cuts = list(nx.all_node_cuts(B, k=k, flow_func=flow_func))`
			`stack = [(k, _generate_partition(B, cuts, k))]`
			`while stack:`
			`(parent_k, partition) = stack[-1]`
			`try:`
			`nodes = next(partition)`
			`C = B.subgraph(nodes)`
			`this_k = nx.node_connectivity(C, flow_func=flow_func)`
			`if this_k > parent_k and this_k > 2:`
			`k_components[this_k].append(set(C))`
			`cuts = list(nx.all_node_cuts(C, k=this_k, flow_func=flow_func))`
			`if cuts:`
			`stack.append((this_k, _generate_partition(C, cuts, this_k)))`
			`except StopIteration:`
			`stack.pop()`

			`# This is necessary because k-components may only be reported at their`
			`# maximum k level. But we want to return a dictionary in which keys are`
			`# connectivity levels and values list of sets of components, without`
			`# skipping any connectivity level. Also, it's possible that subsets of`
			`# an already detected k-component appear at a level k. Checking for this`
			`# in the while loop above penalizes the common case. Thus we also have to`
			`# _consolidate all connectivity levels in _reconstruct_k_components.`
			`return _reconstruct_k_components(k_components)`


			`def _consolidate(sets, k):`
			`"""Merge sets that share k or more elements.`

			`See: http://rosettacode.org/wiki/Set_consolidation`

			`The iterative python implementation posted there is`
			`faster than this because of the overhead of building a`
			`Graph and calling nx.connected_components, but it's not`
			`clear for us if we can use it in NetworkX because there`
			`is no licence for the code.`

			`"""`
			`G = nx.Graph()`
			`nodes = {i: s for i, s in enumerate(sets)}`
			`G.add_nodes_from(nodes)`
			`G.add_edges_from((u, v) for u, v in combinations(nodes, 2)`
			`if len(nodes[u] & nodes[v]) >= k)`
			`for component in nx.connected_components(G):`
			`yield set.union(*[nodes[n] for n in component])`


			`def _generate_partition(G, cuts, k):`
			`def has_nbrs_in_partition(G, node, partition):`
			`for n in G[node]:`
			`if n in partition:`
			`return True`
			`return False`
			`components = []`
			`nodes = ({n for n, d in G.degree() if d > k} -`
			`{n for cut in cuts for n in cut})`
			`H = G.subgraph(nodes)`
			`for cc in nx.connected_components(H):`
			`component = set(cc)`
			`for cut in cuts:`
			`for node in cut:`
			`if has_nbrs_in_partition(G, node, cc):`
			`component.add(node)`
			`if len(component) < G.order():`
			`components.append(component)`
			`for component in _consolidate(components, k + 1):`
			`yield component`


			`def _reconstruct_k_components(k_comps):`
			`result = dict()`
			`max_k = max(k_comps)`
			`for k in reversed(range(1, max_k + 1)):`
			`if k == max_k:`
			`result[k] = list(_consolidate(k_comps[k], k))`
			`elif k not in k_comps:`
			`result[k] = list(_consolidate(result[k + 1], k))`
			`else:`
			`nodes_at_k = set.union(*k_comps[k])`
			`to_add = [c for c in result[k + 1] if any(n not in nodes_at_k for n in c)]`
			`if to_add:`
			`result[k] = list(_consolidate(k_comps[k] + to_add, k))`
			`else:`
			`result[k] = list(_consolidate(k_comps[k], k))`
			`return result`


			`def build_k_number_dict(kcomps):`
			`result = {}`
			`for k, comps in sorted(kcomps.items(), key=itemgetter(0)):`
			`for comp in comps:`
			`for node in comp:`
			`result[node] = k`
			`return result`