# Original author: D. Eppstein, UC Irvine, August 12, 2003. # The original code at http://www.ics.uci.edu/~eppstein/PADS/ is public domain. # Copyright (C) 2004-2019 by # Aric Hagberg # Dan Schult # Pieter Swart # Tomas Gavenciak # All rights reserved. # BSD license. # # Authors: Tomas Gavenciak # Aric Hagberg """Functions for reading and writing graphs in the *sparse6* format. The *sparse6* file format is a space-efficient format for large sparse graphs. For small graphs or large dense graphs, use the *graph6* file format. For more information, see the `sparse6`_ homepage. .. _sparse6: http://users.cecs.anu.edu.au/~bdm/data/formats.html """ from itertools import chain import math import sys import networkx as nx from networkx.exception import NetworkXError from networkx.utils import open_file, not_implemented_for from networkx.readwrite.graph6 import data_to_n, n_to_data __all__ = ['from_sparse6_bytes', 'read_sparse6', 'to_sparse6_bytes', 'write_sparse6'] def _generate_sparse6_bytes(G, nodes, header): """Yield bytes in the sparse6 encoding of a graph. `G` is an undirected simple graph. `nodes` is the list of nodes for which the node-induced subgraph will be encoded; if `nodes` is the list of all nodes in the graph, the entire graph will be encoded. `header` is a Boolean that specifies whether to generate the header ``b'>>sparse6<<'`` before the remaining data. This function generates `bytes` objects in the following order: 1. the header (if requested), 2. the encoding of the number of nodes, 3. each character, one-at-a-time, in the encoding of the requested node-induced subgraph, 4. a newline character. This function raises :exc:`ValueError` if the graph is too large for the graph6 format (that is, greater than ``2 ** 36`` nodes). """ n = len(G) if n >= 2 ** 36: raise ValueError('sparse6 is only defined if number of nodes is less ' 'than 2 ** 36') if header: yield b'>>sparse6<<' yield b':' for d in n_to_data(n): yield str.encode(chr(d + 63)) k = 1 while 1 << k < n: k += 1 def enc(x): """Big endian k-bit encoding of x""" return [1 if (x & 1 << (k - 1 - i)) else 0 for i in range(k)] edges = sorted((max(u, v), min(u, v)) for u, v in G.edges()) bits = [] curv = 0 for (v, u) in edges: if v == curv: # current vertex edge bits.append(0) bits.extend(enc(u)) elif v == curv + 1: # next vertex edge curv += 1 bits.append(1) bits.extend(enc(u)) else: # skip to vertex v and then add edge to u curv = v bits.append(1) bits.extend(enc(v)) bits.append(0) bits.extend(enc(u)) if k < 6 and n == (1 << k) and ((-len(bits)) % 6) >= k and curv < (n - 1): # Padding special case: small k, n=2^k, # more than k bits of padding needed, # current vertex is not (n-1) -- # appending 1111... would add a loop on (n-1) bits.append(0) bits.extend([1] * ((-len(bits)) % 6)) else: bits.extend([1] * ((-len(bits)) % 6)) data = [(bits[i + 0] << 5) + (bits[i + 1] << 4) + (bits[i + 2] << 3) + (bits[i + 3] << 2) + (bits[i + 4] << 1) + (bits[i + 5] << 0) for i in range(0, len(bits), 6)] for d in data: yield str.encode(chr(d + 63)) yield b'\n' def from_sparse6_bytes(string): """Read an undirected graph in sparse6 format from string. Parameters ---------- string : string Data in sparse6 format Returns ------- G : Graph Raises ------ NetworkXError If the string is unable to be parsed in sparse6 format Examples -------- >>> G = nx.from_sparse6_bytes(b':A_') >>> sorted(G.edges()) [(0, 1), (0, 1), (0, 1)] See Also -------- read_sparse6, write_sparse6 References ---------- .. [1] Sparse6 specification """ if string.startswith(b'>>sparse6<<'): string = string[11:] if not string.startswith(b':'): raise NetworkXError('Expected leading colon in sparse6') if sys.version_info < (3, ): chars = [ord(c) - 63 for c in string[1:]] else: chars = [c - 63 for c in string[1:]] n, data = data_to_n(chars) k = 1 while 1 << k < n: k += 1 def parseData(): """Returns stream of pairs b[i], x[i] for sparse6 format.""" chunks = iter(data) d = None # partial data word dLen = 0 # how many unparsed bits are left in d while 1: if dLen < 1: try: d = next(chunks) except StopIteration: return dLen = 6 dLen -= 1 b = (d >> dLen) & 1 # grab top remaining bit x = d & ((1 << dLen) - 1) # partially built up value of x xLen = dLen # how many bits included so far in x while xLen < k: # now grab full chunks until we have enough try: d = next(chunks) except StopIteration: return dLen = 6 x = (x << 6) + d xLen += 6 x = (x >> (xLen - k)) # shift back the extra bits dLen = xLen - k yield b, x v = 0 G = nx.MultiGraph() G.add_nodes_from(range(n)) multigraph = False for b, x in parseData(): if b == 1: v += 1 # padding with ones can cause overlarge number here if x >= n or v >= n: break elif x > v: v = x else: if G.has_edge(x, v): multigraph = True G.add_edge(x, v) if not multigraph: G = nx.Graph(G) return G def to_sparse6_bytes(G, nodes=None, header=True): """Convert an undirected graph to bytes in sparse6 format. Parameters ---------- G : Graph (undirected) nodes: list or iterable Nodes are labeled 0...n-1 in the order provided. If None the ordering given by ``G.nodes()`` is used. header: bool If True add '>>sparse6<<' bytes to head of data. Raises ------ NetworkXNotImplemented If the graph is directed. ValueError If the graph has at least ``2 ** 36`` nodes; the sparse6 format is only defined for graphs of order less than ``2 ** 36``. Examples -------- >>> nx.to_sparse6_bytes(nx.path_graph(2)) # doctest: +SKIP b'>>sparse6<<:An\\n' See Also -------- to_sparse6_bytes, read_sparse6, write_sparse6_bytes Notes ----- The returned bytes end with a newline character. The format does not support edge or node labels. References ---------- .. [1] Graph6 specification """ if nodes is not None: G = G.subgraph(nodes) G = nx.convert_node_labels_to_integers(G, ordering='sorted') return b''.join(_generate_sparse6_bytes(G, nodes, header)) @open_file(0, mode='rb') def read_sparse6(path): """Read an undirected graph in sparse6 format from path. Parameters ---------- path : file or string File or filename to write. Returns ------- G : Graph/Multigraph or list of Graphs/MultiGraphs If the file contains multiple lines then a list of graphs is returned Raises ------ NetworkXError If the string is unable to be parsed in sparse6 format Examples -------- You can read a sparse6 file by giving the path to the file:: >>> import tempfile >>> with tempfile.NamedTemporaryFile() as f: ... _ = f.write(b'>>sparse6<<:An\\n') ... _ = f.seek(0) ... G = nx.read_sparse6(f.name) >>> list(G.edges()) [(0, 1)] You can also read a sparse6 file by giving an open file-like object:: >>> import tempfile >>> with tempfile.NamedTemporaryFile() as f: ... _ = f.write(b'>>sparse6<<:An\\n') ... _ = f.seek(0) ... G = nx.read_sparse6(f) >>> list(G.edges()) [(0, 1)] See Also -------- read_sparse6, from_sparse6_bytes References ---------- .. [1] Sparse6 specification """ glist = [] for line in path: line = line.strip() if not len(line): continue glist.append(from_sparse6_bytes(line)) if len(glist) == 1: return glist[0] else: return glist @not_implemented_for('directed') @open_file(1, mode='wb') def write_sparse6(G, path, nodes=None, header=True): """Write graph G to given path in sparse6 format. Parameters ---------- G : Graph (undirected) path : file or string File or filename to write nodes: list or iterable Nodes are labeled 0...n-1 in the order provided. If None the ordering given by G.nodes() is used. header: bool If True add '>>sparse6<<' string to head of data Raises ------ NetworkXError If the graph is directed Examples -------- You can write a sparse6 file by giving the path to the file:: >>> import tempfile >>> with tempfile.NamedTemporaryFile() as f: ... nx.write_sparse6(nx.path_graph(2), f.name) ... print(f.read()) # doctest: +SKIP b'>>sparse6<<:An\\n' You can also write a sparse6 file by giving an open file-like object:: >>> with tempfile.NamedTemporaryFile() as f: ... nx.write_sparse6(nx.path_graph(2), f) ... _ = f.seek(0) ... print(f.read()) # doctest: +SKIP b'>>sparse6<<:An\\n' See Also -------- read_sparse6, from_sparse6_bytes Notes ----- The format does not support edge or node labels. References ---------- .. [1] Sparse6 specification """ if nodes is not None: G = G.subgraph(nodes) G = nx.convert_node_labels_to_integers(G, ordering='sorted') for b in _generate_sparse6_bytes(G, nodes, header): path.write(b)