This repository has been archived on 2023-03-25. You can view files and clone it, but cannot push or open issues or pull requests.
2020-07-30 01:16:18 +02:00

484 lines
18 KiB
Python

#!/usr/bin/env python
import pytest
import networkx as nx
from networkx.algorithms.similarity import *
from networkx.generators.classic import *
def nmatch(n1, n2):
return n1 == n2
def ematch(e1, e2):
return e1 == e2
def getCanonical():
G = nx.Graph()
G.add_node('A', label='A')
G.add_node('B', label='B')
G.add_node('C', label='C')
G.add_node('D', label='D')
G.add_edge('A', 'B', label='a-b')
G.add_edge('B', 'C', label='b-c')
G.add_edge('B', 'D', label='b-d')
return G
class TestSimilarity:
@classmethod
def setup_class(cls):
global numpy
global scipy
numpy = pytest.importorskip('numpy')
scipy = pytest.importorskip('scipy')
def test_graph_edit_distance(self):
G0 = nx.Graph()
G1 = path_graph(6)
G2 = cycle_graph(6)
G3 = wheel_graph(7)
assert graph_edit_distance(G0, G0) == 0
assert graph_edit_distance(G0, G1) == 11
assert graph_edit_distance(G1, G0) == 11
assert graph_edit_distance(G0, G2) == 12
assert graph_edit_distance(G2, G0) == 12
assert graph_edit_distance(G0, G3) == 19
assert graph_edit_distance(G3, G0) == 19
assert graph_edit_distance(G1, G1) == 0
assert graph_edit_distance(G1, G2) == 1
assert graph_edit_distance(G2, G1) == 1
assert graph_edit_distance(G1, G3) == 8
assert graph_edit_distance(G3, G1) == 8
assert graph_edit_distance(G2, G2) == 0
assert graph_edit_distance(G2, G3) == 7
assert graph_edit_distance(G3, G2) == 7
assert graph_edit_distance(G3, G3) == 0
def test_graph_edit_distance_node_match(self):
G1 = cycle_graph(5)
G2 = cycle_graph(5)
for n, attr in G1.nodes.items():
attr['color'] = 'red' if n % 2 == 0 else 'blue'
for n, attr in G2.nodes.items():
attr['color'] = 'red' if n % 2 == 1 else 'blue'
assert graph_edit_distance(G1, G2) == 0
assert graph_edit_distance(G1, G2, node_match=lambda n1, n2: n1['color'] == n2['color']) == 1
def test_graph_edit_distance_edge_match(self):
G1 = path_graph(6)
G2 = path_graph(6)
for e, attr in G1.edges.items():
attr['color'] = 'red' if min(e) % 2 == 0 else 'blue'
for e, attr in G2.edges.items():
attr['color'] = 'red' if min(e) // 3 == 0 else 'blue'
assert graph_edit_distance(G1, G2) == 0
assert graph_edit_distance(G1, G2, edge_match=lambda e1, e2: e1['color'] == e2['color']) == 2
def test_graph_edit_distance_node_cost(self):
G1 = path_graph(6)
G2 = path_graph(6)
for n, attr in G1.nodes.items():
attr['color'] = 'red' if n % 2 == 0 else 'blue'
for n, attr in G2.nodes.items():
attr['color'] = 'red' if n % 2 == 1 else 'blue'
def node_subst_cost(uattr, vattr):
if uattr['color'] == vattr['color']:
return 1
else:
return 10
def node_del_cost(attr):
if attr['color'] == 'blue':
return 20
else:
return 50
def node_ins_cost(attr):
if attr['color'] == 'blue':
return 40
else:
return 100
assert graph_edit_distance(G1, G2,
node_subst_cost=node_subst_cost,
node_del_cost=node_del_cost,
node_ins_cost=node_ins_cost) == 6
def test_graph_edit_distance_edge_cost(self):
G1 = path_graph(6)
G2 = path_graph(6)
for e, attr in G1.edges.items():
attr['color'] = 'red' if min(e) % 2 == 0 else 'blue'
for e, attr in G2.edges.items():
attr['color'] = 'red' if min(e) // 3 == 0 else 'blue'
def edge_subst_cost(gattr, hattr):
if gattr['color'] == hattr['color']:
return 0.01
else:
return 0.1
def edge_del_cost(attr):
if attr['color'] == 'blue':
return 0.2
else:
return 0.5
def edge_ins_cost(attr):
if attr['color'] == 'blue':
return 0.4
else:
return 1.0
assert graph_edit_distance(G1, G2,
edge_subst_cost=edge_subst_cost,
edge_del_cost=edge_del_cost,
edge_ins_cost=edge_ins_cost) == 0.23
def test_graph_edit_distance_upper_bound(self):
G1 = circular_ladder_graph(2)
G2 = circular_ladder_graph(6)
assert graph_edit_distance(G1, G2, upper_bound=5) == None
assert graph_edit_distance(G1, G2, upper_bound=24) == 22
assert graph_edit_distance(G1, G2) == 22
def test_optimal_edit_paths(self):
G1 = path_graph(3)
G2 = cycle_graph(3)
paths, cost = optimal_edit_paths(G1, G2)
assert cost == 1
assert len(paths) == 6
def canonical(vertex_path, edge_path):
return tuple(sorted(vertex_path)), tuple(sorted(edge_path, key=lambda x: (None in x, x)))
expected_paths = [([(0, 0), (1, 1), (2, 2)], [((0, 1), (0, 1)), ((1, 2), (1, 2)), (None, (0, 2))]),
([(0, 0), (1, 2), (2, 1)], [((0, 1), (0, 2)), ((1, 2), (1, 2)), (None, (0, 1))]),
([(0, 1), (1, 0), (2, 2)], [((0, 1), (0, 1)), ((1, 2), (0, 2)), (None, (1, 2))]),
([(0, 1), (1, 2), (2, 0)], [((0, 1), (1, 2)), ((1, 2), (0, 2)), (None, (0, 1))]),
([(0, 2), (1, 0), (2, 1)], [((0, 1), (0, 2)), ((1, 2), (0, 1)), (None, (1, 2))]),
([(0, 2), (1, 1), (2, 0)], [((0, 1), (1, 2)), ((1, 2), (0, 1)), (None, (0, 2))])]
assert (set(canonical(*p) for p in paths) ==
set(canonical(*p) for p in expected_paths))
def test_optimize_graph_edit_distance(self):
G1 = circular_ladder_graph(2)
G2 = circular_ladder_graph(6)
bestcost = 1000
for cost in optimize_graph_edit_distance(G1, G2):
assert cost < bestcost
bestcost = cost
assert bestcost == 22
# def test_graph_edit_distance_bigger(self):
# G1 = circular_ladder_graph(12)
# G2 = circular_ladder_graph(16)
# assert_equal(graph_edit_distance(G1, G2), 22)
def test_selfloops(self):
G0 = nx.Graph()
G1 = nx.Graph()
G1.add_edges_from((('A', 'A'), ('A', 'B')))
G2 = nx.Graph()
G2.add_edges_from((('A', 'B'), ('B', 'B')))
G3 = nx.Graph()
G3.add_edges_from((('A', 'A'), ('A', 'B'), ('B', 'B')))
assert graph_edit_distance(G0, G0) == 0
assert graph_edit_distance(G0, G1) == 4
assert graph_edit_distance(G1, G0) == 4
assert graph_edit_distance(G0, G2) == 4
assert graph_edit_distance(G2, G0) == 4
assert graph_edit_distance(G0, G3) == 5
assert graph_edit_distance(G3, G0) == 5
assert graph_edit_distance(G1, G1) == 0
assert graph_edit_distance(G1, G2) == 0
assert graph_edit_distance(G2, G1) == 0
assert graph_edit_distance(G1, G3) == 1
assert graph_edit_distance(G3, G1) == 1
assert graph_edit_distance(G2, G2) == 0
assert graph_edit_distance(G2, G3) == 1
assert graph_edit_distance(G3, G2) == 1
assert graph_edit_distance(G3, G3) == 0
def test_digraph(self):
G0 = nx.DiGraph()
G1 = nx.DiGraph()
G1.add_edges_from((('A', 'B'), ('B', 'C'), ('C', 'D'), ('D', 'A')))
G2 = nx.DiGraph()
G2.add_edges_from((('A', 'B'), ('B', 'C'), ('C', 'D'), ('A', 'D')))
G3 = nx.DiGraph()
G3.add_edges_from((('A', 'B'), ('A', 'C'), ('B', 'D'), ('C', 'D')))
assert graph_edit_distance(G0, G0) == 0
assert graph_edit_distance(G0, G1) == 8
assert graph_edit_distance(G1, G0) == 8
assert graph_edit_distance(G0, G2) == 8
assert graph_edit_distance(G2, G0) == 8
assert graph_edit_distance(G0, G3) == 8
assert graph_edit_distance(G3, G0) == 8
assert graph_edit_distance(G1, G1) == 0
assert graph_edit_distance(G1, G2) == 2
assert graph_edit_distance(G2, G1) == 2
assert graph_edit_distance(G1, G3) == 4
assert graph_edit_distance(G3, G1) == 4
assert graph_edit_distance(G2, G2) == 0
assert graph_edit_distance(G2, G3) == 2
assert graph_edit_distance(G3, G2) == 2
assert graph_edit_distance(G3, G3) == 0
def test_multigraph(self):
G0 = nx.MultiGraph()
G1 = nx.MultiGraph()
G1.add_edges_from((('A', 'B'), ('B', 'C'), ('A', 'C')))
G2 = nx.MultiGraph()
G2.add_edges_from((('A', 'B'), ('B', 'C'), ('B', 'C'), ('A', 'C')))
G3 = nx.MultiGraph()
G3.add_edges_from((('A', 'B'), ('B', 'C'), ('A', 'C'), ('A', 'C'), ('A', 'C')))
assert graph_edit_distance(G0, G0) == 0
assert graph_edit_distance(G0, G1) == 6
assert graph_edit_distance(G1, G0) == 6
assert graph_edit_distance(G0, G2) == 7
assert graph_edit_distance(G2, G0) == 7
assert graph_edit_distance(G0, G3) == 8
assert graph_edit_distance(G3, G0) == 8
assert graph_edit_distance(G1, G1) == 0
assert graph_edit_distance(G1, G2) == 1
assert graph_edit_distance(G2, G1) == 1
assert graph_edit_distance(G1, G3) == 2
assert graph_edit_distance(G3, G1) == 2
assert graph_edit_distance(G2, G2) == 0
assert graph_edit_distance(G2, G3) == 1
assert graph_edit_distance(G3, G2) == 1
assert graph_edit_distance(G3, G3) == 0
def test_multidigraph(self):
G1 = nx.MultiDiGraph()
G1.add_edges_from((('hardware', 'kernel'), ('kernel', 'hardware'), ('kernel', 'userspace'), ('userspace', 'kernel')))
G2 = nx.MultiDiGraph()
G2.add_edges_from((('winter', 'spring'), ('spring', 'summer'), ('summer', 'autumn'), ('autumn', 'winter')))
assert graph_edit_distance(G1, G2) == 5
assert graph_edit_distance(G2, G1) == 5
# by https://github.com/jfbeaumont
def testCopy(self):
G = nx.Graph()
G.add_node('A', label='A')
G.add_node('B', label='B')
G.add_edge('A', 'B', label='a-b')
assert graph_edit_distance(G, G.copy(), node_match=nmatch, edge_match=ematch) == 0
def testSame(self):
G1 = nx.Graph()
G1.add_node('A', label='A')
G1.add_node('B', label='B')
G1.add_edge('A', 'B', label='a-b')
G2 = nx.Graph()
G2.add_node('A', label='A')
G2.add_node('B', label='B')
G2.add_edge('A', 'B', label='a-b')
assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 0
def testOneEdgeLabelDiff(self):
G1 = nx.Graph()
G1.add_node('A', label='A')
G1.add_node('B', label='B')
G1.add_edge('A', 'B', label='a-b')
G2 = nx.Graph()
G2.add_node('A', label='A')
G2.add_node('B', label='B')
G2.add_edge('A', 'B', label='bad')
assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 1
def testOneNodeLabelDiff(self):
G1 = nx.Graph()
G1.add_node('A', label='A')
G1.add_node('B', label='B')
G1.add_edge('A', 'B', label='a-b')
G2 = nx.Graph()
G2.add_node('A', label='Z')
G2.add_node('B', label='B')
G2.add_edge('A', 'B', label='a-b')
assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 1
def testOneExtraNode(self):
G1 = nx.Graph()
G1.add_node('A', label='A')
G1.add_node('B', label='B')
G1.add_edge('A', 'B', label='a-b')
G2 = nx.Graph()
G2.add_node('A', label='A')
G2.add_node('B', label='B')
G2.add_edge('A', 'B', label='a-b')
G2.add_node('C', label='C')
assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 1
def testOneExtraEdge(self):
G1 = nx.Graph()
G1.add_node('A', label='A')
G1.add_node('B', label='B')
G1.add_node('C', label='C')
G1.add_node('C', label='C')
G1.add_edge('A', 'B', label='a-b')
G2 = nx.Graph()
G2.add_node('A', label='A')
G2.add_node('B', label='B')
G2.add_node('C', label='C')
G2.add_edge('A', 'B', label='a-b')
G2.add_edge('A', 'C', label='a-c')
assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 1
def testOneExtraNodeAndEdge(self):
G1 = nx.Graph()
G1.add_node('A', label='A')
G1.add_node('B', label='B')
G1.add_edge('A', 'B', label='a-b')
G2 = nx.Graph()
G2.add_node('A', label='A')
G2.add_node('B', label='B')
G2.add_node('C', label='C')
G2.add_edge('A', 'B', label='a-b')
G2.add_edge('A', 'C', label='a-c')
assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 2
def testGraph1(self):
G1 = getCanonical()
G2 = nx.Graph()
G2.add_node('A', label='A')
G2.add_node('B', label='B')
G2.add_node('D', label='D')
G2.add_node('E', label='E')
G2.add_edge('A', 'B', label='a-b')
G2.add_edge('B', 'D', label='b-d')
G2.add_edge('D', 'E', label='d-e')
assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 3
def testGraph2(self):
G1 = getCanonical()
G2 = nx.Graph()
G2.add_node('A', label='A')
G2.add_node('B', label='B')
G2.add_node('C', label='C')
G2.add_node('D', label='D')
G2.add_node('E', label='E')
G2.add_edge('A', 'B', label='a-b')
G2.add_edge('B', 'C', label='b-c')
G2.add_edge('C', 'D', label='c-d')
G2.add_edge('C', 'E', label='c-e')
assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 4
def testGraph3(self):
G1 = getCanonical()
G2 = nx.Graph()
G2.add_node('A', label='A')
G2.add_node('B', label='B')
G2.add_node('C', label='C')
G2.add_node('D', label='D')
G2.add_node('E', label='E')
G2.add_node('F', label='F')
G2.add_node('G', label='G')
G2.add_edge('A', 'C', label='a-c')
G2.add_edge('A', 'D', label='a-d')
G2.add_edge('D', 'E', label='d-e')
G2.add_edge('D', 'F', label='d-f')
G2.add_edge('D', 'G', label='d-g')
G2.add_edge('E', 'B', label='e-b')
assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 12
def testGraph4(self):
G1 = getCanonical()
G2 = nx.Graph()
G2.add_node('A', label='A')
G2.add_node('B', label='B')
G2.add_node('C', label='C')
G2.add_node('D', label='D')
G2.add_edge('A', 'B', label='a-b')
G2.add_edge('B', 'C', label='b-c')
G2.add_edge('C', 'D', label='c-d')
assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 2
def testGraph4_a(self):
G1 = getCanonical()
G2 = nx.Graph()
G2.add_node('A', label='A')
G2.add_node('B', label='B')
G2.add_node('C', label='C')
G2.add_node('D', label='D')
G2.add_edge('A', 'B', label='a-b')
G2.add_edge('B', 'C', label='b-c')
G2.add_edge('A', 'D', label='a-d')
assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 2
def testGraph4_b(self):
G1 = getCanonical()
G2 = nx.Graph()
G2.add_node('A', label='A')
G2.add_node('B', label='B')
G2.add_node('C', label='C')
G2.add_node('D', label='D')
G2.add_edge('A', 'B', label='a-b')
G2.add_edge('B', 'C', label='b-c')
G2.add_edge('B', 'D', label='bad')
assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 1
def test_simrank_no_source_no_target(self):
G = nx.cycle_graph(5)
expected = {0: {0: 1, 1: 0.3951219505902448, 2: 0.5707317069281646, 3: 0.5707317069281646, 4: 0.3951219505902449}, 1: {0: 0.3951219505902448, 1: 1, 2: 0.3951219505902449, 3: 0.5707317069281646, 4: 0.5707317069281646}, 2: {0: 0.5707317069281646, 1: 0.3951219505902449, 2: 1, 3: 0.3951219505902449, 4: 0.5707317069281646}, 3: {0: 0.5707317069281646, 1: 0.5707317069281646, 2: 0.3951219505902449, 3: 1, 4: 0.3951219505902449}, 4: {0: 0.3951219505902449, 1: 0.5707317069281646, 2: 0.5707317069281646, 3: 0.3951219505902449, 4: 1}}
actual = nx.simrank_similarity(G)
assert expected == actual
def test_simrank_source_no_target(self):
G = nx.cycle_graph(5)
expected = {0: 1, 1: 0.3951219505902448, 2: 0.5707317069281646, 3: 0.5707317069281646, 4: 0.3951219505902449}
actual = nx.simrank_similarity(G, source=0)
assert expected == actual
def test_simrank_source_and_target(self):
G = nx.cycle_graph(5)
expected = 1
actual = nx.simrank_similarity(G, source=0, target=0)
assert expected == actual
def test_simrank_numpy_no_source_no_target(self):
G = nx.cycle_graph(5)
expected = numpy.array([
[1.0, 0.3947180735764555, 0.570482097206368, 0.570482097206368, 0.3947180735764555],
[0.3947180735764555, 1.0, 0.3947180735764555, 0.570482097206368, 0.570482097206368],
[0.570482097206368, 0.3947180735764555, 1.0, 0.3947180735764555, 0.570482097206368],
[0.570482097206368, 0.570482097206368, 0.3947180735764555, 1.0, 0.3947180735764555],
[0.3947180735764555, 0.570482097206368, 0.570482097206368, 0.3947180735764555, 1.0]
])
actual = nx.simrank_similarity_numpy(G)
numpy.testing.assert_allclose(expected, actual, atol=1e-7)
def test_simrank_numpy_source_no_target(self):
G = nx.cycle_graph(5)
expected = numpy.array(
[1.0, 0.3947180735764555, 0.570482097206368, 0.570482097206368, 0.3947180735764555],
)
actual = nx.simrank_similarity_numpy(G, source=0)
numpy.testing.assert_allclose(expected, actual, atol=1e-7)
def test_simrank_numpy_source_and_target(self):
G = nx.cycle_graph(5)
expected = 1.0
actual = nx.simrank_similarity_numpy(G, source=0, target=0)
numpy.testing.assert_allclose(expected, actual, atol=1e-7)