diff --git a/gerrychain/constraints/contiguity.py b/gerrychain/constraints/contiguity.py index 81674b84..1c92e6f8 100644 --- a/gerrychain/constraints/contiguity.py +++ b/gerrychain/constraints/contiguity.py @@ -8,14 +8,6 @@ from ..graph import Graph -# frm TODO: Remove this comment about NX dependencies (once we are all set with the work) -# -# NX dependencies: -# def _are_reachable(G: nx.Graph, ...) -# nx.is_connected(partition.subgraphs[part]) for part in _affected_parts(partition) -# adj = nx.to_dict_of_lists(partition.subgraphs[part]) -# - # frm: TODO: Think about the efficiency of the routines in this module. Almost all # of these involve traversing the entire graph, and I fear that callers # might make multiple calls. @@ -23,6 +15,16 @@ # Possible solutions are to 1) speed up these routines somehow and 2) cache # results so that at least we don't do the traversals over and over. +# frm: TODO: Rethink WTF this module is all about. +# +# It seems like a grab bag for lots of different things - used in different places. +# +# What got me to write this comment was looking at the signature for def contiguous() +# which operates on a partition, but lots of other routines here operate on graphs or +# other things. So, what is going on? +# + + def _are_reachable(graph: Graph, start_node: Any, avoid: Callable, targets: Any) -> bool: """ A modified version of NetworkX's function @@ -101,8 +103,6 @@ def _are_reachable(graph: Graph, start_node: Any, avoid: Callable, targets: Any) continue # already searched this node. node_distances[node_id] = distance - dbg_neighbors = graph.neighbors(node_id) - for neighbor in graph.neighbors(node_id): if avoid(node_id, neighbor): continue @@ -360,12 +360,11 @@ def _bfs(graph: Dict[int, list]) -> bool: """ q = [next(iter(graph))] visited = set() - # frm TODO: Make sure len() is defined on Graph object... - total_vertices = len(graph) + num_nodes = len(graph) # Check if the district has a single vertex. If it does, then simply return # `True`, as it's trivially connected. - if total_vertices <= 1: + if num_nodes <= 1: return True # bfs! @@ -378,10 +377,12 @@ def _bfs(graph: Dict[int, list]) -> bool: visited.add(neighbor) q += [neighbor] - return total_vertices == len(visited) + return num_nodes == len(visited) # frm: TODO: Verify that is_connected_bfs() works - add a test or two... +# frm: TODO: Move this code into graph.py. It is all about the Graph... + # frm: Code obtained from the web - probably could be optimized... # This code replaced calls on nx.is_connected() def is_connected_bfs(graph: Graph): diff --git a/gerrychain/graph/graph.py b/gerrychain/graph/graph.py index ba3ac2ba..3dc58313 100644 --- a/gerrychain/graph/graph.py +++ b/gerrychain/graph/graph.py @@ -32,12 +32,12 @@ from shapely.ops import unary_union from shapely.prepared import prep -import numpy as np -from scipy.sparse import csr_array - +import numpy +import scipy ######################################################### # frm Overview of changes (May 2025): +# frm: TODO: Revise (or perhaps just delete) this comment / discussion... """ This comment is temporary - it describes the work done to encapsulate dependency on NetworkX so that this file is the only file that has any NetworkX dependencies. @@ -166,6 +166,7 @@ class Graph: """ + # frm: TODO: Update the comment below - making sure it is 100% accurate (and useful) # frm: This class cannot have a constructor - because there is code that assumes # that it can use the default constructor to create instances of it. # That code is buried deep in non GerryChain code, so I don't really understand @@ -179,6 +180,9 @@ class Graph: # frm: TODO: Add documentation for new data members I am adding: # _nx_graph, _rx_graph, _node_id_to_parent_node_id_map, _is_a_subgraph + # _node_id_to_original_nx_node_id_map + # => used to recreate NX graph from an RX graph and also + # as an aid for testing @classmethod def from_networkx(cls, nx_graph: networkx.Graph) -> "Graph": @@ -191,12 +195,31 @@ def from_networkx(cls, nx_graph: networkx.Graph) -> "Graph": graph._node_id_to_parent_node_id_map = {node: node for node in graph.node_indices} # Maps node_ids in the graph to the "original" node_ids in parent graph. # For top-level graphs, this is just an identity map - graph._node_id_to_original_node_id_map = {node: node for node in graph.node_indices} + graph._node_id_to_original_nx_node_id_map = {node: node for node in graph.node_indices} graph.nx_to_rx_node_id_map = None # only set when an NX based graph is converted to be an RX based graph return graph @classmethod def from_rustworkx(cls, rx_graph: rustworkx.PyGraph) -> "Graph": + # This routine is intended to be used to create top level graphs that + # are 1) not subgraphs and 2) not based on NetworkX Graphs. Stated + # differently, subgraphs and RX graphs derived from NetworkX graphs + # need to create translation maps for node_ids (either to the parent + # of a subgraph or the "original" node in a NetworkX Graph), and this + # routine does neither of those things. + + # frm: TODO: Think about node data dictionaries - do I need to check for them? + # + # While NX graphs always have a node data dictionary, RX graphs do not have to + # have any data, and the data does not need to be a data dictionary. Since + # gerrychain code depends on having a data dictionary associated with nodes, + # it probably makes sense to make sure that the rustworkx graph provided + # as a parameter does in fact have a data dictionary for every node in the + # graph... + # + # The same applies for edge data... + # + graph = cls() graph._rx_graph = rx_graph graph._nx_graph = None @@ -204,32 +227,98 @@ def from_rustworkx(cls, rx_graph: rustworkx.PyGraph) -> "Graph": # Maps node_ids in the graph to the "parent" node_ids in the parent graph. # For top-level graphs, this is just an identity map graph._node_id_to_parent_node_id_map = {node: node for node in graph.node_indices} - # Maps node_ids in the graph to the "original" node_ids in parent graph. - # For top-level graphs, this is just an identity map - graph._node_id_to_original_node_id_map = {node: node for node in graph.node_indices} + # Maps node_ids in the graph to the "original" NX node_ids. At this point, we + # don't know if this RX based graph was indeed based on an NX graph, so we just + # set this to the identity map - which makes sense if this was not derived from + # and NX-based Graph. However, in the case when the graph is indeed derived from + # an NX-based Graph, it is the responsibility of the caller to set + # the _node_id_to_original_nx_node_id_map appropriately. + graph._node_id_to_original_nx_node_id_map = {node: node for node in graph.node_indices} graph.nx_to_rx_node_id_map = None # only set when an NX based graph is converted to be an RX based graph return graph + def to_networkx_graph(self): + if self.is_nx_graph(): + return self.get_nx_graph() + + if not self.is_rx_graph(): + # frm: TODO: Raise specific exception - type error? + raise Exception("to_networkx: bad Graph type") + + # OK - we have an RX-based Graph, so create a NetworkX Graph object + # that has all of the node data and edge data, and which has the + # original node_ids and edge_ids. + # + # Original node_ids are those that were used in the original NX + # Graph used to create the RX-based Graph object. + # + + # Confirm that this RX based graph was derived from an NX graph... + if self._node_id_to_original_nx_node_id_map == None: + raise Exception("to_networkx_graph: _node_id_to_original_nx_node_id_map is None") + + rx_graph = self.get_rx_graph() + + # Extract node data + node_data = [] + for node_id in rx_graph.node_indices(): + node_payload = rx_graph[node_id] + # Get the "original" node_id + original_nx_node_id = self.original_nx_node_id_for_internal_node_id(node_id) + node_data.append({"node_name": original_nx_node_id, **node_payload}) + + # Extract edge data + edge_data = [] + for edge_id in rx_graph.edge_indices(): + edge = rx_graph.get_edge_endpoints_by_index(edge_id) + edge_0_node_id = edge[0] + edge_1_node_id = edge[1] + # Get the "original" node_ids + edge_0_original_nx_node_id = self.original_nx_node_id_for_internal_node_id(edge_0_node_id) + edge_1_original_nx_node_id = self.original_nx_node_id_for_internal_node_id(edge_1_node_id) + edge_payload = rx_graph.get_edge_data_by_index(edge_id) + # Add edges and edge data using the original node_ids + # as the names/IDs for the nodes that make up the edge + edge_data.append({"source": edge_0_original_nx_node_id, "target": edge_1_original_nx_node_id, **edge_payload}) + + # Create Pandas DataFrames + + nodes_df = pd.DataFrame(node_data) + edges_df = pd.DataFrame(edge_data) + + # Create a NetworkX Graph object from the edges_df, using + # "source", and "tartet" to define edge node_ids, and adding + # all attribute data (True). + nx_graph = networkx.from_pandas_edgelist(edges_df, 'source', 'target', True, networkx.Graph) + + # Add all of the node_data, using the "node_name" attr as the NX Graph node_id + nodes_df = nodes_df.set_index('node_name') + # frm: TODO: WTF is going on with the line immediately below? + node_data_dict = nodes_df.to_dict(orient='index') + networkx.set_node_attributes(nx_graph, nodes_df.to_dict(orient='index')) + + return nx_graph + # frm: TODO: Create a test for this routine - def original_node_ids_for_set(self, set_of_nodes): + def original_nx_node_ids_for_set(self, set_of_nodes): # Utility routine to quickly translate a set of node_ids to their original node_ids - _node_id_to_original_node_id_map = self._node_id_to_original_node_id_map - new_set = {_node_id_to_original_node_id_map[node_id] for node_id in set_of_nodes} + _node_id_to_original_nx_node_id_map = self._node_id_to_original_nx_node_id_map + new_set = {_node_id_to_original_nx_node_id_map[node_id] for node_id in set_of_nodes} return new_set # frm: TODO: Create a test for this routine - def original_node_ids_for_list(self, list_of_nodes): + def original_nx_node_ids_for_list(self, list_of_nodes): # Utility routine to quickly translate a set of node_ids to their original node_ids - _node_id_to_original_node_id_map = self._node_id_to_original_node_id_map - new_list = [_node_id_to_original_node_id_map[node_id] for node_id in set_of_nodes] + _node_id_to_original_nx_node_id_map = self._node_id_to_original_nx_node_id_map + new_list = [_node_id_to_original_nx_node_id_map[node_id] for node_id in list_of_nodes] return new_list - def original_node_id_for_internal_node_id(self, internal_node_id): - return self._node_id_to_original_node_id_map[internal_node_id] + def original_nx_node_id_for_internal_node_id(self, internal_node_id): + return self._node_id_to_original_nx_node_id_map[internal_node_id] # frm: TODO: Create a test for this routine - def internal_node_id_for_original_node_id(self, original_node_id): - # frm: TODO: Think about a better way to map original_node_ids to internal node_ids + def internal_node_id_for_original_nx_node_id(self, original_nx_node_id): + # frm: TODO: Think about a better way to map original_nx_node_ids to internal node_ids # # The problem is that when this routine is called, it may often be called repeatedly # for a list of nodes, and we create the reverse dict every time this is called which @@ -239,14 +328,28 @@ def internal_node_id_for_original_node_id(self, original_node_id): # reverse the map so we can go from original node_id to internal node_id orignal_node_id_to_internal_node_id_map = { - v: k for k,v in self._node_id_to_original_node_id_map.items() + v: k for k,v in self._node_id_to_original_nx_node_id_map.items() } - return orignal_node_id_to_internal_node_id_map[original_node_id] + return orignal_node_id_to_internal_node_id_map[original_nx_node_id] def verify_graph_is_valid(self): + # frm: TODO: Performance: Only check verify_graph_is_valid() in development. + # + # For now, in order to assess performance differences between NX and RX + # I will just return True... + return True + + # Sanity check - this is where to add additional sanity checks in the future. + # frm: TODO: Enhance this routine to do more... + + # frm: TODO: Performance: verify_graph_is_valid() is expensive - called a lot + # + # Come up with a way to run this in "debug mode" - that is, while in development/testing + # but not in production. It actually accounted for 5% of runtime... + # Checks that there is one and only one graph if not ( (self._nx_graph is not None and self._rx_graph is None) @@ -254,8 +357,16 @@ def verify_graph_is_valid(self): ): raise Exception("Graph.verify_graph_is_valid - graph not properly configured") + # frm: TODO: Performance: is_nx_graph() and is_rx_graph() are expensive. + # + # Not all of the calls on these routines is needed in production - some are just + # sanity checking. Find a way to NOT run this code when in production. + def is_nx_graph(self): - self.verify_graph_is_valid() + # frm: TODO: Performance: Only check graph_is_valid() in production + # + # Find a clever way to only run this code in development. Commenting it out for now... + # self.verify_graph_is_valid() return self._nx_graph is not None def get_nx_graph(self): @@ -269,7 +380,10 @@ def get_rx_graph(self): return self._rx_graph def is_rx_graph(self): - self.verify_graph_is_valid() + # frm: TODO: Performance: Only check graph_is_valid() in production + # + # Find a clever way to only run this code in development. Commenting it out for now... + # self.verify_graph_is_valid() return self._rx_graph is not None def convert_from_nx_to_rx(self) -> "Graph": @@ -285,17 +399,47 @@ def convert_from_nx_to_rx(self) -> "Graph": # self.verify_graph_is_valid() if self.is_nx_graph(): - rx_graph = rustworkx.networkx_converter(self._nx_graph, keep_attributes=True) + + if (self._is_a_subgraph): + # This routine is intended to be used in exactly one place - in converting + # an NX based Graph object to be RX based when creating a Partition object. + # In the future, it might become useful for other reasons, but until then + # to guard against careless uses, the code will insist that it not be a subgraph. + + # frm: TODO: Add a comment about the intended use of this routine to its + # overview comment above. + raise Exception("convert_from_nx_to_rx: graph to be converted is a subgraph") + + nx_graph = self._nx_graph + rx_graph = rustworkx.networkx_converter(nx_graph, keep_attributes=True) + + # Note that the resulting RX graph will have multigraph set to False which + # ensures that there is never more than one edge between two specific nodes. + # This is perhaps not all that interesting in general, but it is critical + # when getting the edge_id from an edge using RX.edge_indices_from_endpoints() + # routine - because it ensures that only a single edge_id is returned... converted_graph = Graph.from_rustworkx(rx_graph) + # Some graphs have geometry data (from a geodataframe), so preserve it if it exists + if hasattr(self, "geometry"): + converted_graph.geometry = self.geometry + # Create a mapping from the old NX node_ids to the new RX node_ids (created by # RX when it converts from NX) nx_to_rx_node_id_map = { converted_graph.node_data(node_id)["__networkx_node__"]: node_id for node_id in converted_graph._rx_graph.node_indices() } - converted_graph._nx_to_rx_node_id_map = nx_to_rx_node_id_map + converted_graph.nx_to_rx_node_id_map = nx_to_rx_node_id_map + + # We also have to update the _node_id_to_original_nx_node_id_map to refer to the node_ids + # in the NX Graph object. + _node_id_to_original_nx_node_id_map = {} + for node_id in converted_graph.nodes: + original_nx_node_id = converted_graph.node_data(node_id)["__networkx_node__"] + _node_id_to_original_nx_node_id_map[node_id] = original_nx_node_id + converted_graph._node_id_to_original_nx_node_id_map = _node_id_to_original_nx_node_id_map return converted_graph elif self.is_rx_graph(): @@ -308,7 +452,7 @@ def get_nx_to_rx_node_id_map(self): if not self.is_rx_graph(): raise Exception("get_nx_to_rx_node_id_map: Graph is not an RX based Graph") - return self._nx_to_rx_node_id_map + return self.nx_to_rx_node_id_map @classmethod def from_json(cls, json_file: str) -> "Graph": @@ -515,6 +659,7 @@ def from_geodataframe( # Generate dict of dicts of dicts with shared perimeters according # to the requested adjacency rule adjacencies = neighbors(df, adjacency) # Note - this is adjacency.neighbors() + # frm: TODO: Make it explicit that neighbors() above is adjacency.neighbors() # frm: Original Code: graph = cls(adjacencies) nx_graph = networkx.Graph(adjacencies) @@ -611,6 +756,15 @@ def lookup(self, node: Any, field: Any): return self.node_data(node, field) + # Performance Note: + # + # Most of the functions in the Graph class will be called after a + # partition has been created and the underlying graph converted + # to be based on RX. So, by testing first for RX we actually + # save a significant amount of time because we do not need to + # also test for NX (if you test for NX first then you do two tests). + # + @property def node_indices(self): self.verify_graph_is_valid() @@ -618,10 +772,10 @@ def node_indices(self): # frm: TODO: This does the same thing that graph.nodes does - returning a list of node_ids. # Do we really want to support two ways of doing the same thing? - if (self.is_nx_graph()): - return set(self._nx_graph.nodes) - elif (self.is_rx_graph()): + if (self.is_rx_graph()): return set(self._rx_graph.node_indices()) + elif (self.is_nx_graph()): + return set(self._nx_graph.nodes) else: raise Exception("Graph.node_indices - bad kind of graph object") @@ -629,12 +783,12 @@ def node_indices(self): def edge_indices(self): self.verify_graph_is_valid() - if (self.is_nx_graph()): - # A set of edge_ids (tuples) extracted from the graph's EdgeView - return set(self._nx_graph.edges) - elif (self.is_rx_graph()): + if (self.is_rx_graph()): # A set of edge_ids for the edges return set(self._rx_graph.edge_indices()) + elif (self.is_nx_graph()): + # A set of edge_ids (tuples) extracted from the graph's EdgeView + return set(self._nx_graph.edges) else: raise Exception("Graph.edges - bad kind of graph object") @@ -649,12 +803,17 @@ def get_edge_from_edge_id(self, edge_id): """ self.verify_graph_is_valid() - if (self.is_nx_graph()): + if (self.is_rx_graph()): + # In RX, we need to go get the edge tuple + # frm: TODO: Performance - use get_edge_endpoints_by_index() to get edge + # + # The original RX code (before October 27, 2025): + # return self._rx_graph.edge_list()[edge_id] + endpoints = self._rx_graph.get_edge_endpoints_by_index(edge_id) + return (endpoints[0], endpoints[1]) + elif (self.is_nx_graph()): # In NX, the edge_id is also the edge tuple return edge_id - elif (self.is_rx_graph()): - # In RX, we need to go get the edge tuple - return self._rx_graph.edge_list()[edge_id] else: raise Exception("Graph.get_edge_from_edge_id - bad kind of graph object") @@ -666,25 +825,18 @@ def get_edge_id_from_edge(self, edge): """ self.verify_graph_is_valid() - if (self.is_nx_graph()): + if (self.is_rx_graph()): + # Note that while in general the routine, edge_indices_from_endpoints(), + # can return more than one edge in the case of a Multi-Graph (a graph that + # allows more than one edge between two nodes), we can rely on it only + # returning a single edge because the RX graph object has multigraph set + # to false by RX.networkx_converter() - because the NX graph was undirected... + # + edge_indices = self._rx_graph.edge_indices_from_endpoints(edge[0], edge[1]) + return edge_indices[0] # there will always be one and only one + elif (self.is_nx_graph()): # In NX, the edge_id is also the edge tuple return edge - elif (self.is_rx_graph()): - # In RX, we need to go get the edge_id from the edge tuple - # frm: TODO: Think about whether there is a better way to do this. I am - # worried that this might be expensive in terms of performance - # with large graphs. This is used in tree.py when seeing if a - # cut edge has a weight assigned to it. - # frm: Note that we sort both the edge_list and the edge, to canonicalize - # the edges so the smaller node_id is first. This allows us to not - # worry about whether the edge was (3,5) or (5,3) - - sorted_edge_list = sorted(list(self._rx_graph.edge_list())) - # frm: TODO: There has to be a more elegant way to do this... *sheesh* - sorted_edge = edge - if edge[0] > edge[1]: - sorted_edge = (edge[1], edge[0]) - return sorted_edge_list.index(sorted_edge) else: raise Exception("Graph.get_edge_id_from_edge - bad kind of graph object") @@ -692,12 +844,12 @@ def get_edge_id_from_edge(self, edge): def nodes(self): self.verify_graph_is_valid() - if (self.is_nx_graph()): - # A list of node_ids - - return list(self._nx_graph.nodes) - elif (self.is_rx_graph()): + if (self.is_rx_graph()): # A list of integer node_ids return list(self._rx_graph.node_indices()) + elif (self.is_nx_graph()): + # A list of node_ids - + return list(self._nx_graph.nodes) else: raise Exception("Graph.sdges - bad kind of graph object") @@ -757,39 +909,63 @@ def edges(self): self.verify_graph_is_valid() - if (self.is_nx_graph()): - # A set of tuples extracted from the graph's EdgeView - return set(self._nx_graph.edges) - elif (self.is_rx_graph()): + # frm: TODO: Think about whether edges() should return a set or a list. + # + # nodes() returns a list, so my first take is that edges() should do so + # as well (or maybe nodes() should return a set). Seems odd for them to return + # different types... + + if (self.is_rx_graph()): # A set of tuples for the edges return set(self._rx_graph.edge_list()) + elif (self.is_nx_graph()): + # A set of tuples extracted from the graph's EdgeView + return set(self._nx_graph.edges) else: raise Exception("Graph.edges - bad kind of graph object") def add_edge(self, node_id1, node_id2): + # frm: TODO: Think about whether this routine makes sense for RX based Graph objects + # + # The current paradigm is that the user will use NX to create and modify graphs, but + # then transition (automatically) to using RX once a Partition object is created. + # This paradigm would not allow modifying an RX graph, so maybe (until we transition + # to allowing folks to start with RX based Graph objects) we disallow this for RX + # based graphs + # + self.verify_graph_is_valid() - if (self.is_nx_graph()): - self._nx_graph.add_edge(node_id1, node_id2) - elif (self.is_rx_graph()): + if (self.is_rx_graph()): # empty dict tells RX the edge data will be a dict self._rx_graph.add_edge(node_id1, node_id2, {}) + elif (self.is_nx_graph()): + self._nx_graph.add_edge(node_id1, node_id2) else: raise Exception("Graph.add_edge - bad kind of graph object") def get_edge_tuple(self, edge_id): + + # frm: TODO: Delete this routine after verifying that nobody uses it. + # + # It appears that the gerrychain code does not use this. Need to + # verify that it was added by me (and hence is not part of any + # external legacy code), but then it should be deleted as it is + # semantically the same as get_edge_from_edge_id() + self.verify_graph_is_valid() - if (self.is_nx_graph()): + if (self.is_rx_graph()): + # frm: TODO: Performance: There is probably a more efficnient way to do this + return self._rx_graph.edge_list()[edge_id] + elif (self.is_nx_graph()): # In NX, the edge_id is already a tuple with the two node_ids return edge_id - elif (self.is_rx_graph()): - return self._rx_graph.edge_list()[edge_id] else: raise Exception("Graph.get_edge_tuple - bad kind of graph object") def add_data( - self, df: pd.DataFrame, columns: Optional[Iterable[str]] = None + self, df: pd.DataFrame, columns: Optional[Iterable[str]] = None ) -> None: """ Add columns of a DataFrame to a graph as node attributes @@ -939,10 +1115,10 @@ def __getattr__(self, __name: str) -> Any: # If attribute doesn't exist on this object, try # its underlying graph object... - if (self.is_nx_graph()): - return object.__getattribute__(self._nx_graph, __name) - elif (self.is_rx_graph()): + if (self.is_rx_graph()): return object.__getattribute__(self._rx_graph, __name) + elif (self.is_nx_graph()): + return object.__getattribute__(self._nx_graph, __name) else: raise Exception("Graph.__getattribute__ - bad kind of graph object") @@ -964,11 +1140,11 @@ def __getitem__(self, __name: str) -> Any: # self.verify_graph_is_valid() - if (self.is_nx_graph()): - return self._nx_graph[__name] - elif (self.is_rx_graph()): + if (self.is_rx_graph()): # frm TODO: raise Exception("Graph.__getitem__() NYI for RX") + elif (self.is_nx_graph()): + return self._nx_graph[__name] else: raise Exception("Graph.__getitem__() - bad kind of graph object") @@ -1027,7 +1203,7 @@ def subgraph(self, nodes: Iterable[Any]) -> "Graph": new_subgraph = self.from_networkx(nx_subgraph) # for NX, the node_ids in subgraph are the same as in the parent graph _node_id_to_parent_node_id_map = {node: node for node in nodes} - _node_id_to_original_node_id_map = {node: node for node in nodes} + _node_id_to_original_nx_node_id_map = {node: node for node in nodes} elif (self.is_rx_graph()): # frm TODO: Need to check logic below - not sure this works exactly correctly for RX... if isinstance(nodes, frozenset) or isinstance(nodes, set): @@ -1043,9 +1219,24 @@ def subgraph(self, nodes: Iterable[Any]) -> "Graph": self.node_data(node_id)["parent_node_id"] = node_id # frm: TODO: Since data is shared by nodes in subgraphs, perhaps we could just set - # the "original_node_id" in the beginning and rely on it forever... + # the "original_nx_node_id" in the beginning and rely on it forever... + # + # This actually gives me the heebie-jeebies - we have aliased data that we are overwriting. + # Either it is aliased data and we count on that and don't bother to update it, or else + # we ensure that it is NOT aliased data and then we can feel free to overwrite it and/or + # set it anew... + # + # In short, verify that data in subgraphs is shared (in RX) and then think to make sure + # that we do NOT need to set this value, because it is already set. + # + # A more general issue is whether it is OK for data in subgraphs to be shared. I think + # so because we do not store any temporary or context dependent information in node-data + # but it would be nice to validate that (and ideally to test it). At the very least + # if subgraph data (and in fact NX and RX node data) is shared, then that should be documented + # up the wazoo... + # for node_id in nodes: - self.node_data(node_id)["original_node_id"] = self._node_id_to_original_node_id_map[node_id] + self.node_data(node_id)["original_nx_node_id"] = self._node_id_to_original_nx_node_id_map[node_id] rx_subgraph = self._rx_graph.subgraph(nodes) new_subgraph = self.from_rustworkx(rx_subgraph) @@ -1055,21 +1246,25 @@ def subgraph(self, nodes: Iterable[Any]) -> "Graph": for subgraph_node_id in new_subgraph.node_indices: _node_id_to_parent_node_id_map[subgraph_node_id] = new_subgraph.node_data(subgraph_node_id)["parent_node_id"] # frm: Create the map from subgraph node_id to the original graph's node_id - _node_id_to_original_node_id_map = {} + _node_id_to_original_nx_node_id_map = {} for subgraph_node_id in new_subgraph.node_indices: - _node_id_to_original_node_id_map[subgraph_node_id] = new_subgraph.node_data(subgraph_node_id)["original_node_id"] + _node_id_to_original_nx_node_id_map[subgraph_node_id] = new_subgraph.node_data(subgraph_node_id)["original_nx_node_id"] else: raise Exception("Graph.subgraph - bad kind of graph object") new_subgraph._is_a_subgraph = True new_subgraph._node_id_to_parent_node_id_map = _node_id_to_parent_node_id_map - new_subgraph._node_id_to_original_node_id_map = _node_id_to_original_node_id_map + new_subgraph._node_id_to_original_nx_node_id_map = _node_id_to_original_nx_node_id_map return new_subgraph def translate_subgraph_node_ids_for_flips(self, flips): # flips is a dictionary mapping node_ids to parts (districts). + # This routine replaces the node_ids of the subgraph with the node_ids + # for the same node in the parent graph. This routine is used to + # when a computation is made on a subgraph but the resulting flips + # being returned want to be the appropriate node_ids for the parent graph. translated_flips = {} for subgraph_node_id, part in flips.items(): parent_node_id = self._node_id_to_parent_node_id_map[subgraph_node_id] @@ -1078,12 +1273,16 @@ def translate_subgraph_node_ids_for_flips(self, flips): return translated_flips def translate_subgraph_node_ids_for_set_of_nodes(self, set_of_nodes): + # This routine replaces the node_ids of the subgraph with the node_ids + # for the same node in the parent graph. This routine is used to + # when a computation is made on a subgraph but the resulting set of nodes + # being returned want to be the appropriate node_ids for the parent graph. translated_set_of_nodes = set() for node_id in set_of_nodes: translated_set_of_nodes.add(self._node_id_to_parent_node_id_map[node_id]) return translated_set_of_nodes - def nx_generic_bfs_edges(self, source, neighbors=None, depth_limit=None): + def generic_bfs_edges(self, source, neighbors=None, depth_limit=None): # frm: Code copied from GitHub: # # https://github.com/networkx/networkx/blob/main/networkx/algorithms/traversal/breadth_first_search.py @@ -1187,7 +1386,7 @@ def generic_bfs_successors_generator(self, root_node_id): # the children of that node (list of node_ids). parent = root_node_id children = [] - for p, c in self.nx_generic_bfs_edges(root_node_id): + for p, c in self.generic_bfs_edges(root_node_id): # frm: parent-child pairs appear ordered by their parent, so # we can collect all of the children for a node by just # iterating through pairs until the parent changes. @@ -1209,7 +1408,7 @@ def generic_bfs_predecessors(self, root_node_id): # frm Note: We had do implement our own, because the built-in RX version only worked # for directed graphs. predecessors = [] - for s, t in self.nx_generic_bfs_edges(root_node_id): + for s, t in self.generic_bfs_edges(root_node_id): predecessors.append((t,s)) return dict(predecessors) @@ -1249,29 +1448,31 @@ def predecessors(self, root_node_id): All of this is interesting, but I have not yet spent the time to figure out why it matters in the code. - TODO: The code in NetworkX for bfs_successors() and bfs_predecessors() - works on undirected graphs (with cleverness to cut cycles), but - the same named routines in RX only operate on directed graphs, - so there is work to be done to make this functionality work - for RX... + TODO: Decide if it makes sense to have different implementations + for NX and RX. The code below has the original definition + from the pre-RX codebase, but the code for RX will work + for NX too - so I think that there is no good reason to + have different code for NX. Maybe no harm, but on the other + hand, it seems like a needless difference and hence more + complexity... """ self.verify_graph_is_valid() - if (self.is_nx_graph()): - return {a: b for a, b in networkx.bfs_predecessors(self._nx_graph, root_node_id)} - elif (self.is_rx_graph()): + if (self.is_rx_graph()): return self.generic_bfs_predecessors(root_node_id) + elif (self.is_nx_graph()): + return {a: b for a, b in networkx.bfs_predecessors(self._nx_graph, root_node_id)} else: raise Exception("Graph.predecessors - bad kind of graph object") def successors(self, root_node_id): self.verify_graph_is_valid() - if (self.is_nx_graph()): - return {a: b for a, b in networkx.bfs_successors(self._nx_graph, root_node_id)} - elif (self.is_rx_graph()): + if (self.is_rx_graph()): return self.generic_bfs_successors(root_node_id) + elif (self.is_nx_graph()): + return {a: b for a, b in networkx.bfs_successors(self._nx_graph, root_node_id)} else: raise Exception("Graph.successors - bad kind of graph object") @@ -1281,20 +1482,30 @@ def neighbors(self, node): # NX neighbors() returns a which iterates over the node_ids of neighbor nodes # RX neighbors() returns a NodeIndices object with the list of node_ids of neighbor nodes # However, the code outside graph.py only ever iterates over all neighbors so returning a list works... - if (self.is_nx_graph()): + if (self.is_rx_graph()): + # frm: TODO: Performance: Do not convert to a list + # + # The RX path (this path) is much more expensive than the NX path, and + # I am assuming that it is the conversion to a list in the original code + # that is expensive, so I am going to just not convert it to a list + # since the return type of rx.neighbors() is a NodeIndices object which + # is already essentially a Python list - in that it implements the Python sequence protocol + # + # Original code: + # return list(self._rx_graph.neighbors(node)) + return self._rx_graph.neighbors(node) + elif (self.is_nx_graph()): return list(self._nx_graph.neighbors(node)) - elif (self.is_rx_graph()): - return list(self._rx_graph.neighbors(node)) else: raise Exception("Graph.neighbors - bad kind of graph object") def degree(self, node: Any) -> int: self.verify_graph_is_valid() - if (self.is_nx_graph()): - return self._nx_graph.degree(node) - elif (self.is_rx_graph()): + if (self.is_rx_graph()): return self._rx_graph.degree(node) + elif (self.is_nx_graph()): + return self._nx_graph.degree(node) else: raise Exception("Graph.degree - bad kind of graph object") @@ -1303,10 +1514,10 @@ def node_data(self, node_id): self.verify_graph_is_valid() - if (self.is_nx_graph()): - data_dict = self._nx_graph.nodes[node_id] - elif (self.is_rx_graph()): + if (self.is_rx_graph()): data_dict = self._rx_graph[node_id] + elif (self.is_nx_graph()): + data_dict = self._nx_graph.nodes[node_id] else: raise Exception("Graph.node_data - bad kind of graph object") @@ -1329,15 +1540,19 @@ def edge_data(self, edge_id): self.verify_graph_is_valid() - if (self.is_nx_graph()): + if (self.is_rx_graph()): + # frm: TODO: Performance: use get_edge_data_by_index() + # + # Original code (pre October 2025) that indexed into all edges => slow + # data_dict = self._rx_graph.edges()[edge_id] + data_dict = self._rx_graph.get_edge_data_by_index(edge_id) + elif (self.is_nx_graph()): data_dict = self._nx_graph.edges[edge_id] - elif (self.is_rx_graph()): - data_dict = self._rx_graph.edges()[edge_id] else: - raise Exception("Graph.node_data - bad kind of graph object") + raise Exception("Graph.edge_data - bad kind of graph object") if not isinstance(data_dict, dict): - raise Exception("node data is not a dictionary"); + raise Exception("edge data is not a dictionary"); return data_dict @@ -1359,32 +1574,104 @@ def laplacian_matrix(self): # processing, I don't think this matters, but I should # check to be 100% certain. - if self.is_nx_graph(): - nx_graph = self._nx_graph - laplacian_matrix = networkx.laplacian_matrix(nx_graph) - elif self.is_rx_graph(): + if self.is_rx_graph(): rx_graph = self._rx_graph # 1. Get the adjacency matrix adj_matrix = rustworkx.adjacency_matrix(rx_graph) # 2. Calculate the degree matrix (simplified for this example) - degree_matrix = np.diag([rx_graph.degree(node) for node in rx_graph.node_indices()]) + degree_matrix = numpy.diag([rx_graph.degree(node) for node in rx_graph.node_indices()]) # 3. Calculate the Laplacian matrix np_laplacian_matrix = degree_matrix - adj_matrix - # 4. Convert the NumPy array to a csr_array - laplacian_matrix = csr_array(np_laplacian_matrix) + # 4. Convert the NumPy array to a scipy.sparse array + laplacian_matrix = scipy.sparse.csr_array(np_laplacian_matrix) + elif self.is_nx_graph(): + nx_graph = self._nx_graph + laplacian_matrix = networkx.laplacian_matrix(nx_graph) else: raise Exception("laplacian_matrix: badly configured graph parameter") return laplacian_matrix def normalized_laplacian_matrix(self): - if self.is_nx_graph(): + + def create_scipy_sparse_array_from_rx_graph(rx_graph): + num_nodes = rx_graph.num_nodes() + + rows = [] + cols = [] + data = [] + + for u, v in rx_graph.edge_list(): + rows.append(u) + cols.append(v) + data.append(1) # simple adjacency matrix, so just 1 not weight attribute + + sparse_array = scipy.sparse.coo_matrix((data, (rows,cols)), shape=(num_nodes, num_nodes)) + + return sparse_array + + if self.is_rx_graph(): + rx_graph = self._rx_graph + """ + The following is code copied from the networkx linalg file, laplacianmatrix.py + for normalized_laplacian_matrix. Below this code has been modified to work for + gerrychain (with an RX-based Graph object) + + import numpy as np + import scipy as sp + + if nodelist is None: + nodelist = list(G) + A = nx.to_scipy_sparse_array(G, nodelist=nodelist, weight=weight, format="csr") + n, _ = A.shape + diags = A.sum(axis=1) + D = sp.sparse.dia_array((diags, 0), shape=(n, n)).tocsr() + L = D - A + with np.errstate(divide="ignore"): + diags_sqrt = 1.0 / np.sqrt(diags) + diags_sqrt[np.isinf(diags_sqrt)] = 0 + DH = sp.sparse.dia_array((diags_sqrt, 0), shape=(n, n)).tocsr() + return DH @ (L @ DH) + + """ + + # frm: TODO: Get someone to validate that this in fact does the right thing. + # + # The one test, test_proposal_returns_a_partition[spectral_recom], in test_proposals.py + # that uses normalized_laplacian_matrix() now passes, but it is for a small 6x6 graph + # and hence is not a real world test... + # + # I have left debugging print statements (commented out) in case someone in the future + # (probably me) wants to verify that the right things are happening... + + # Original NetworkX Code: + # A = networkx.to_scipy_sparse_array(G, nodelist=nodelist, weight=weight, format="csr") + # + A = create_scipy_sparse_array_from_rx_graph(rx_graph) + n, _ = A.shape # shape() => dimensions of the array (rows, cols), so n = num_rows + # print("") + # print(f"normalized_laplacian_matrix: num_rows = {n}") + # print(f"normalized_laplacian_matrix: sparse_array is: {A}") + diags = A.sum(axis=1) # sum of values in each row => column vector + diags = diags.T # convert to a row vector / 1D array + # print(f"normalized_laplacian_matrix: diags is: \n{diags}") + D = scipy.sparse.dia_array((diags, [0]), shape=(n, n)).tocsr() + # print(f"normalized_laplacian_matrix: D is: \n{D}") + L = D - A + # print(f"normalized_laplacian_matrix: L is: \n{L}") + with numpy.errstate(divide="ignore"): + diags_sqrt = 1.0 / numpy.sqrt(diags) + diags_sqrt[numpy.isinf(diags_sqrt)] = 0 + # print(f"normalized_laplacian_matrix: diags_sqrt is: \n{diags_sqrt}") + DH = scipy.sparse.dia_array((diags_sqrt, 0), shape=(n, n)).tocsr() + # print(f"normalized_laplacian_matrix: DH is: \n{DH}") + normalized_laplacian = DH @ (L @ DH) + # print(f"normalized_laplacian_matrix: normalized_laplacian is: \n{normalized_laplacian}") + return normalized_laplacian + + elif self.is_nx_graph(): nx_graph = self._nx_graph laplacian_matrix = networkx.normalized_laplacian_matrix(nx_graph) - elif self.is_rx_graph(): - # frm: TODO: Implement normalized_laplacian_matrix() for RX - rx_graph = self._rx_graph - raise Exception("normalized_laplacian_matrix NYI for RustworkX based Graph objects") else: raise Exception("normalized_laplacian_matrix: badly configured graph parameter") @@ -1395,33 +1682,60 @@ def subgraphs_for_connected_components(self): # # This mirrors the nx.connected_components() routine in NetworkX - if self.is_nx_graph(): - nx_graph = self.get_nx_graph() - subgraphs = [ - self.subgraph(nodes) for nodes in networkx.connected_components(nx_graph) - ] - elif self.is_rx_graph(): + if self.is_rx_graph(): rx_graph = self.get_rx_graph() subgraphs = [ self.subgraph(nodes) for nodes in rustworkx.connected_components(rx_graph) ] + elif self.is_nx_graph(): + nx_graph = self.get_nx_graph() + subgraphs = [ + self.subgraph(nodes) for nodes in networkx.connected_components(nx_graph) + ] else: raise Exception("subgraphs_for_connected_components: Bad kind of Graph") return subgraphs def num_connected_components(self): - if self.is_nx_graph(): - nx_graph = self.get_nx_graph() - connected_components = list(networkx.connected_components(nx_graph)) - elif self.is_rx_graph(): + if self.is_rx_graph(): rx_graph = self.get_rx_graph() connected_components = rustworkx.connected_components(rx_graph) + elif self.is_nx_graph(): + nx_graph = self.get_nx_graph() + connected_components = list(networkx.connected_components(nx_graph)) else: raise Exception("num_connected_components: Bad kind of Graph") num_cc = len(connected_components) return num_cc + + def is_a_tree(self): + if self.is_rx_graph(): + rx_graph = self.get_rx_graph() + num_nodes = rx_graph.num_nodes() + num_edges = rx_graph.num_edges() + + # Condition 1: Check if the number of edges is one less than the number of nodes + if num_edges != num_nodes - 1: + return False + + # Condition 2: Check for connectivity (and implicitly, acyclicity if E = V-1) + # A graph with V-1 edges and no cycles must be connected. + # A graph with V-1 edges and connected must be acyclic. + + # We can check connectivity by ensuring there's only one connected component. + connected_components = rustworkx.connected_components(rx_graph) + if len(connected_components) != 1: + return False + + return True + elif self.is_nx_graph(): + nx_graph = self.get_nx_graph() + return networkx.is_tree(nx_graph) + else: + raise Exception("is_a_tree: Bad kind of Graph") + ###################################################### class OriginalGraph(networkx.Graph): diff --git a/gerrychain/grid.py b/gerrychain/grid.py index c2455d64..52057f6a 100644 --- a/gerrychain/grid.py +++ b/gerrychain/grid.py @@ -15,16 +15,14 @@ import math import networkx -# frm TODO: Decide whether to leave grid.py as-is, at least for now. -# While it imports NetworkX, it eventually creates a new -# Graph object which is added to a Partition which will -# eventually "freeze" and convert the new Graph object to -# be based on RX (under the covers). -# -# So, this can be thought of as legacy code that works just -# fine. In the future if we want to go full RX everywhere -# we can decide what to do. +# frm TODO: Clarify what purpose grid.py serves. # +# It is a convenience module to help users create toy graphs. It leverages +# NX to create graphs, but it returns new Graph objects. So, legacy user +# code will need to be at least reviewed to make sure that it properly +# copes with new Graph objects. +# + from gerrychain.partition import Partition from gerrychain.graph import Graph from gerrychain.updaters import ( diff --git a/gerrychain/metagraph.py b/gerrychain/metagraph.py index 100131b2..9a1fe928 100644 --- a/gerrychain/metagraph.py +++ b/gerrychain/metagraph.py @@ -27,6 +27,11 @@ def all_cut_edge_flips(partition: Partition) -> Iterator[Dict]: :returns: An iterator that yields dictionaries representing the flipped edges. :rtype: Iterator[Dict] """ + # frm: TODO: Add some documentation so a future readef of this code + # will not be as confused as I was... + + # frm: TODO: Why is this an iterator instead of just a dict? + # frm: For my own edification... It took me a while to understand why # this routine made sense at a high level. It finds all edges # on the boundary of districts - those that are "cut edges" diff --git a/gerrychain/metrics/partisan.py b/gerrychain/metrics/partisan.py index 0be22991..6fc9e9ff 100644 --- a/gerrychain/metrics/partisan.py +++ b/gerrychain/metrics/partisan.py @@ -15,7 +15,7 @@ def mean_median(election_results) -> float: """ Computes the Mean-Median score for the given ElectionResults. A positive value indicates an advantage for the first party listed - in the Election's parties_to_columns dictionary. + in the Election's party_names_to_node_attribute_names dictionary. :param election_results: An ElectionResults object :type election_results: ElectionResults @@ -33,7 +33,7 @@ def mean_thirdian(election_results) -> float: """ Computes the Mean-Median score for the given ElectionResults. A positive value indicates an advantage for the first party listed - in the Election's parties_to_columns dictionary. + in the Election's party_names_to_node_attribute_names dictionary. The motivation for this score is that the minority party in many states struggles to win even a third of the seats. @@ -57,7 +57,7 @@ def efficiency_gap(election_results) -> float: """ Computes the efficiency gap for the given ElectionResults. A positive value indicates an advantage for the first party listed - in the Election's parties_to_columns dictionary. + in the Election's party_names_to_node_attribute_names dictionary. :param election_results: An ElectionResults object :type election_results: ElectionResults diff --git a/gerrychain/partition/partition.py b/gerrychain/partition/partition.py index cbb933d7..0d48412b 100644 --- a/gerrychain/partition/partition.py +++ b/gerrychain/partition/partition.py @@ -159,16 +159,16 @@ def _first_time(self, graph, assignment, updaters, use_default_updaters): # a RustworkX graph, and make sure it is also a FrozenGraph. Both # of these are important for performance. - # frm: TODO: Do we want to continue to allow users to create a Partition - # directly from an NX graph? I suppose there is no harm... + # Note that we automatically convert NetworkX based graphs to use RustworkX + # when we create a Partition object. # - # The answer is YES - creating and manipulating NX Graphs is easy and users + # Creating and manipulating NX Graphs is easy and users # are familiar with doing so. It makes sense to preserve the use case of # creating an NX-Graph and then allowing the code to under-the-covers # convert to RX - both for legacy compatibility, but also because NX provides # a really nice and easy way to create graphs. # - # So this TODO should be interpreted as a todo-item to update the documentation + # TODO: update the documentation # to describe the use case of creating a graph using NX. That documentation # should also describe how to post-process results of a MarkovChain run # but I haven't figured that out yet... @@ -179,8 +179,15 @@ def _first_time(self, graph, assignment, updaters, use_default_updaters): # if a Graph object, make sure it is based on an embedded RustworkX.PyGraph if isinstance(graph, Graph): - if (graph.is_nx_graph()): + # frm: TODO: Remove this short-term hack to do performance testing + test_performance_using_NX_graph = False + if (graph.is_nx_graph()) and test_performance_using_NX_graph: + self.assignment = get_assignment(assignment, graph) + print("Performance-Test: using NetworkX for Partition object") + + elif (graph.is_nx_graph()): + print("Partition: converting NX to RX") # Get the assignment that would be appropriate for the NX-based graph old_nx_assignment = get_assignment(assignment, graph) @@ -195,14 +202,6 @@ def _first_time(self, graph, assignment, updaters, use_default_updaters): ) self.assignment = new_rx_assignment - # We also have to update the _node_id_to_original_node_id_map to refer to the node_ids - # in the NX Graph object. - _node_id_to_original_node_id_map = {} - for node_id in graph.nodes: - original_node_id = graph.node_data(node_id)["__networkx_node__"] - _node_id_to_original_node_id_map[node_id] = original_node_id - graph._node_id_to_original_node_id_map = _node_id_to_original_node_id_map - else: self.assignment = get_assignment(assignment, graph) @@ -271,7 +270,7 @@ def __repr__(self): def __len__(self): return len(self.parts) - def flip(self, flips: Dict, use_original_node_ids=False) -> "Partition": + def flip(self, flips: Dict, use_original_nx_node_ids=False) -> "Partition": """ Returns the new partition obtained by performing the given `flips` on this partition. @@ -281,7 +280,7 @@ def flip(self, flips: Dict, use_original_node_ids=False) -> "Partition": :rtype: Partition """ - # frm: TODO: Change comments above to document new optional parameter, use_original_node_ids. + # frm: TODO: Change comments above to document new optional parameter, use_original_nx_node_ids. # # This is a new issue that arises from the fact that node_ids in RX are different from those # in the original NX graph. In the pre-RX code, we did not need to distinguish between @@ -296,10 +295,10 @@ def flip(self, flips: Dict, use_original_node_ids=False) -> "Partition": # Note that original node_ids in flips are typically used in tests # - if use_original_node_ids: + if use_original_nx_node_ids: new_flips = {} - for original_node_id, part in flips.items(): - internal_node_id = self.graph.internal_node_id_for_original_node_id(original_node_id) + for original_nx_node_id, part in flips.items(): + internal_node_id = self.graph.internal_node_id_for_original_nx_node_id(original_nx_node_id) new_flips[internal_node_id] = part flips = new_flips @@ -399,9 +398,10 @@ def plot(self, geometries=None, **kwargs): import geopandas if geometries is None: - geometries = self.graph.geometry - # frm: TODO: Test that self.graph.geometry is not None - but first need to grok - # where this is set (other than Graph.from_geodataframe()) + if hasattr(self.graph, "geometry"): + geometries = self.graph.geometry + else: + raise Exception("Partition.plot: graph has no geometry data") if set(geometries.index) != set(self.graph.nodes): raise TypeError( diff --git a/gerrychain/partition/subgraphs.py b/gerrychain/partition/subgraphs.py index e0b5a0ae..3062267e 100644 --- a/gerrychain/partition/subgraphs.py +++ b/gerrychain/partition/subgraphs.py @@ -1,15 +1,6 @@ from typing import List, Any, Tuple from ..graph import Graph -# frm: ???: TODO: Is this ever actually used by any other code? If so, where and for what? -# YES - it is used as the type of Partition.subgraphs. So, I need to find -# all access to partition.subgraphs and see how it is used. It is also -# used in contiguity.py -# -# This may be an opportunity to encapsulate knowldege of node_indices vs. -# node_names... - - class SubgraphView: """ A view for accessing subgraphs of :class:`Graph` objects. diff --git a/gerrychain/proposals/proposals.py b/gerrychain/proposals/proposals.py index 93c2df10..005591ea 100644 --- a/gerrychain/proposals/proposals.py +++ b/gerrychain/proposals/proposals.py @@ -111,10 +111,8 @@ def slow_reversible_propose_bi(partition: Partition) -> Partition: :rtype: Partition """ - # frm: TODO: Rename x to be edge *sigh*... - - b_nodes = {x[0] for x in partition["cut_edges"]}.union( - {x[1] for x in partition["cut_edges"]} + b_nodes = {edge[0] for edge in partition["cut_edges"]}.union( + {edge[1] for edge in partition["cut_edges"]} ) flip = random.choice(list(b_nodes)) diff --git a/gerrychain/proposals/spectral_proposals.py b/gerrychain/proposals/spectral_proposals.py index 6c687f36..ffc1c5e3 100644 --- a/gerrychain/proposals/spectral_proposals.py +++ b/gerrychain/proposals/spectral_proposals.py @@ -33,16 +33,6 @@ def spectral_cut( # the return value's node_ids need to be translated back into the appropriate # parent node_ids. - # frm: TODO: Subtle issue here - in NX there is no difference between a node ID - # and a node index (what you use to get a node from a list), but - # in RX there is a difference - which manifests most in subgraphs - # where RX goes ahead and renumbers the nodes in the graph. To - # make subgraphs work, we remember (in a map) what the node "IDs" - # of the parent graph were. - # - # The issue here is what the code wants here. We are in an RX - # world at this point - so maybe it doesn't matter, but worth - # thinking about... node_list = list(subgraph.nodes) num_nodes = len(node_list) @@ -51,13 +41,6 @@ def spectral_cut( for edge_id in subgraph.edge_indices: subgraph.edge_data(edge_id)["weight"] = random.random() - # frm TODO: NX vs. RX Issue: NYI: normalized_laplacian_matrix() for RX - # - # Note that while the standard laplacian is straight forward mathematically - # the normalized laplacian is a good bit more complicated. However, since - # NetworkX is open source - perhaps we can get permission to just use their - # code to create RX versions... - # Compute the desired laplacian matrix (convert from sparse to dense) if lap_type == "normalized": laplacian_matrix = (subgraph.normalized_laplacian_matrix()).todense() diff --git a/gerrychain/proposals/tree_proposals.py b/gerrychain/proposals/tree_proposals.py index 71c68b4f..02a59043 100644 --- a/gerrychain/proposals/tree_proposals.py +++ b/gerrychain/proposals/tree_proposals.py @@ -200,8 +200,6 @@ def dist_pair_edges(part, a, b): # frm: Find all edges that cross from district a into district b return set( e - # frm: TODO: edges vs. edge_ids: edges are wanted here (tuples) - # frm: Original Code: for e in part.graph.edges for e in part.graph.edges if ( ( @@ -305,8 +303,6 @@ def bounded_balance_edge_fn(*args, **kwargs): # the subgraph's node_ids afterwards. # - # frm: TODO: Clean up the code below - I munged it for debugging ... - # frm: Original Code: # num_possible_districts, nodes = bipartition_tree_random_reversible( # partition.graph.subgraph(subgraph_nodes), @@ -322,7 +318,7 @@ def bounded_balance_edge_fn(*args, **kwargs): num_possible_districts, nodes = result remaining_nodes = subgraph_nodes - set(nodes) - # frm: Notes to Self: the ** operator below merges the two dicts into a single dict. + # Note: the ** operator below merges the two dicts into a single dict. flips = { **{node: parts_to_merge[0] for node in nodes}, **{node: parts_to_merge[1] for node in remaining_nodes}, diff --git a/gerrychain/tree.py b/gerrychain/tree.py index 2732f0c5..3d55837e 100644 --- a/gerrychain/tree.py +++ b/gerrychain/tree.py @@ -14,7 +14,7 @@ contraction or memoization techniques. - A suite of functions (`bipartition_tree`, `recursive_tree_part`, `_get_seed_chunks`, etc.) for partitioning graphs into balanced subsets based on population targets and tolerances. -- Utility functions like `get_max_prime_factor_less_than` and `recursive_seed_part_inner` +- Utility functions like `get_max_prime_factor_less_than` and `_recursive_seed_part_inner` to assist in complex partitioning tasks. Dependencies: @@ -122,7 +122,7 @@ def random_spanning_tree( :returns: The maximal spanning tree represented as a Networkx Graph. :rtype: nx.Graph """ - # frm: ???: + # frm: TODO: Performance # This seems to me to be an expensive way to build a random spanning # tree. It calls a routine to compute a "minimal" spanning tree that # computes the total "weight" of the spanning tree and selects the @@ -190,19 +190,41 @@ def random_spanning_tree( # # since this doesn't reuse the reference. - - - - if region_surcharge is None: region_surcharge = dict() # frm: Original Code: for edge in graph.edges(): # Changed because in RX edge_ids are integers while edges are tuples + # Add a random weight to each edge in the graph with the goal of + # causing the selection of a different (random) spanning tree based + # on those weights. + # + # If a region_surcharge was passed in, then we want to add additional + # weight to edges that cross regions or that have a node that is + # not in any region. For example, if we want to keep municipalities + # together in the same district, the region_surcharge would contain + # an additional weight associated with the key for municipalities (say + # "mini") and if an edge went from one municipality to another or if + # either of the nodes in the edge were not in a municipality, then + # the edge would be given the additional weight (value) associated + # with the region_surcharge. This would preference/bias the + # spanning_tree algorithm to select other edges... which would have + # the effect of prioritizing keeping regions intact. + + # frm: TODO: Verify that the comment above about region_surcharge is accurate + + # Add random weights to the edges in the graph so that the spanning tree + # algorithm will select a different spanning tree each time. + # for edge_id in graph.edge_indices: edge = graph.get_edge_from_edge_id(edge_id) weight = random.random() + + # If there are any entries in the region_surcharge dict, then add + # additional weight to the edge for 1) edges that cross region boundaries (one + # node is in one region and the other node is in a different region) and 2) edges + # where one (or both) of the nodes is not in a region for key, value in region_surcharge.items(): # We surcharge edges that cross regions and those that are not in any region if ( @@ -218,7 +240,13 @@ def random_spanning_tree( # frm: Original Code: graph.edges[edge]["random_weight"] = weight graph.edge_data(edge_id)["random_weight"] = weight - # frm: CROCK: (for the moment) + # frm: TODO: Think about (and at least document) the fact that edge_data (and node_data) + # is shared by all partitions. So, as we process a chain of partitions, we are + # accessing the same underlying graph, and if we muck with edge_data and node_data + # then we are changing that data for all partitions. Stated differently, + # edge_data and node_data should be considered temporary and not persistent... + + # frm: TODO: CROCK: (for the moment) # We need to create a minimum spanning tree but the way to do so # is different for NX and RX. I am sure that there is a more elegant # way to do this, and in any event, this dependence on NX vs RX @@ -229,6 +257,16 @@ def random_spanning_tree( # frm: TODO: Remove NX / RX dependency - maybe move to graph.py + # frm: TODO: Think a bit about original_nx_node_ids + # + # Original node_ids refer to the node_ids used when a graph was created. + # This mostly means remembering the NX node_ids when you create an RX + # based Graph object. In the code below, we create an RX based Graph + # object, but we do not do anything to map original node_ids. This is + # probably OK, but it depends on how the spanning tree is used elsewhere. + # + # In short, worth some thought... + if (graph.is_nx_graph()): nx_graph = graph.get_nx_graph() spanning_tree = nxtree.minimum_spanning_tree( @@ -541,7 +579,6 @@ def find_balanced_edge_cuts_contraction( Cut( edge=e, # frm: Original Code: weight=h.graph.edges[e].get("random_weight", random.random()), - # frm: TODO: edges vs. edge_ids: edge_ids are wanted here (integers) weight=h.graph.edge_data( h.graph.get_edge_id_from_edge(e) ).get("random_weight", random.random()), @@ -791,6 +828,7 @@ def find_balanced_edge_cuts_memoization( ): e = (node, pred[node]) wt = random.random() + # frm: TODO: Performance: Think if code below can be made faster... cuts.append( Cut( edge=e, @@ -988,7 +1026,7 @@ def _region_preferred_max_weight_choice( return _max_weight_choice(cut_edge_list) -# frm TODO: RX version NYI... def bipartition_tree( +# frm TODO: def bipartition_tree( # # This might get complicated depending on what kinds of functions # are used as parameters. That is, do the functions used as parameters @@ -1108,6 +1146,16 @@ def bipartition_tree( # a function argument's signature? What if someone refactors the code to have # different names??? *sigh* # + # A better strategy would be to lock in the function signature for ALL spanning_tree + # functions and then just have the region_surcharge parameter not be used in some of them... + # + # Same with "one_sided_cut" + # + # Oh - and change "one_sided_cut" to be something a little more intuitive. I have to + # reset my mind every time I see it to figure out whether it means to split into + # two districts or just peel off one district... *sigh* Before doing this, check to + # see if "one_sided_cut" is a term of art that might make sense to some set of experts... + # if "region_surcharge" in signature(spanning_tree_fn).parameters: spanning_tree_fn = partial(spanning_tree_fn, region_surcharge=region_surcharge) @@ -1135,6 +1183,32 @@ def bipartition_tree( # frm: ???: TODO: Again - we should NOT be changing semantics based # on the names in signatures... + # Better approach is to have all of the poosible paramters exist + # in ALL of the versions of the cut_choice() functions and to + # have them default to None if not used by one of the functions. + # Then this code could just pass in the values to the + # cut_choice function, and it could make sense of what to do. + # + # This makes it clear what the overall and comprehensive purpose + # of cut_choice functions are. This centralizes the knowlege + # of what a cut_choice() function is supposed to do - or at least + # it prompts the programmer to document that a param in the + # general scheme does not apply in a given instance. + # + # I realize that this is perhaps not "pythonic" - in that it + # forces the programmer to document overall behavior instead + # of just finding a convenient way to sneak in something new. + # However, when code gets complicated, sneaky/clever code + # is just not worth it - better to have each change be a little + # more painful (needing to change the function signature for + # all instances of a generic function to add new functionality + # that is only needed by one new instance). This provides + # a natural place (in comments of the generic function instances) + # to describe what is going on - and it alerts programmers + # that a given generic function has perhaps many different + # instances - but that they all share the same high level + # responsibility. + is_region_cut = ( "region_surcharge" in signature(cut_choice).parameters and "populated_graph" in signature(cut_choice).parameters @@ -1921,7 +1995,7 @@ def _get_seed_chunks( # But maybe this is intended to be used externally... def get_max_prime_factor_less_than(n: int, ceil: int) -> Optional[int]: """ - Helper function for recursive_seed_part_inner. Returns the largest prime factor of ``n`` + Helper function for _recursive_seed_part_inner. Returns the largest prime factor of ``n`` less than ``ceil``, or None if all are greater than ceil. :param n: The number to find the largest prime factor for. @@ -1954,10 +2028,7 @@ def get_max_prime_factor_less_than(n: int, ceil: int) -> Optional[int]: return largest_factor -# frm: only used in this file -# But maybe this is intended to be used externally... -# frm TODO: Peter says this is only ever used internally, so we can add underscore to the name -def recursive_seed_part_inner( +def _recursive_seed_part_inner( graph: Graph, # frm: Original code: graph: nx.Graph, num_dists: int, pop_target: Union[float, int], @@ -2129,7 +2200,7 @@ def recursive_seed_part_inner( remaining_nodes -= nodes # frm: Create a list with the set of nodes returned by method() and then recurse # to get the rest of the sets of nodes for remaining districts. - assignment = [nodes] + recursive_seed_part_inner( + assignment = [nodes] + _recursive_seed_part_inner( graph.subgraph(remaining_nodes), num_dists - 1, pop_target, @@ -2155,7 +2226,7 @@ def recursive_seed_part_inner( assignment = [] for chunk in chunks: - chunk_assignment = recursive_seed_part_inner( + chunk_assignment = _recursive_seed_part_inner( graph.subgraph(chunk), num_dists // num_chunks, # new target number of districts pop_target, @@ -2170,7 +2241,7 @@ def recursive_seed_part_inner( # frm: From the logic above, this should never happen, but if it did # because of a future edit (bug), at least this will catch it # early before really bizarre things happen... - raise Exception("recursive_seed_part_inner(): Should never happen...") + raise Exception("_recursive_seed_part_inner(): Should never happen...") # The assignment object that has been created needs to have its # node_ids translated into parent_node_ids @@ -2186,7 +2257,7 @@ def recursive_seed_part_inner( -# frm ???: This routine is never called - not in this file and not in any other GerryChain file. +# frm TODO: ???: This routine is never called - not in this file and not in any other GerryChain file. # Is it intended to be used by end-users? And if so, for what purpose? def recursive_seed_part( graph: Graph, # frm: Original code: graph: nx.Graph, @@ -2201,7 +2272,7 @@ def recursive_seed_part( ) -> Dict: """ Returns a partition with ``num_dists`` districts balanced within ``epsilon`` of - ``pop_target`` by recursively splitting graph using recursive_seed_part_inner. + ``pop_target`` by recursively splitting graph using _recursive_seed_part_inner. :param graph: The graph :type graph: nx.Graph @@ -2237,7 +2308,7 @@ def recursive_seed_part( """ # frm: Note: It is not strictly necessary to use a subgraph in the call below on - # recursive_seed_part_inner(), because the top-level graph has + # _recursive_seed_part_inner(), because the top-level graph has # a _node_id_to_parent_node_id_map that just maps node_ids to themselves. However, # it seemed a good practice to ALWAYS call routines that are intended # to deal with subgraphs, to use a subgraph even when not strictly @@ -2251,7 +2322,7 @@ def recursive_seed_part( # In short - an agrument based on invariants being a good thing... # flips = {} - assignment = recursive_seed_part_inner( + assignment = _recursive_seed_part_inner( graph.subgraph(graph.node_indices), len(parts), pop_target, diff --git a/gerrychain/updaters/compactness.py b/gerrychain/updaters/compactness.py index aee6c887..418fe27f 100644 --- a/gerrychain/updaters/compactness.py +++ b/gerrychain/updaters/compactness.py @@ -21,6 +21,9 @@ def boundary_nodes(partition, alias: str = "boundary_nodes") -> Set: # It is used to get the value from the parent if there is # a parent, but it is NOT used when computing the result # for the first partition. Seems like a logic bug... + # + # I think it is used as the attribute name on the partition for the + # data stored by an updater that uses this routine... if partition.parent: return partition.parent[alias] @@ -216,9 +219,6 @@ def interior_boundaries( :rtype: Dict """ - # frm: TODO: NX vs. RX Issue - need to use edge_ids below to access edge information... - # I think I have done this already below... - added_perimeter = sum( # frm: Original Code: partition.graph.edges[edge]["shared_perim"] for edge in new_edges # frm: edges vs edge_ids: edge_ids are wanted here (integers) diff --git a/gerrychain/updaters/election.py b/gerrychain/updaters/election.py index 6a314cef..b29780c3 100644 --- a/gerrychain/updaters/election.py +++ b/gerrychain/updaters/election.py @@ -48,12 +48,12 @@ class Election: :type name: str :ivar parties: A list of the names of the parties in the election. :type parties: List[str] - :ivar columns: A list of the columns in the graph's node data that hold + :ivar node_attribute_names: A list of the node_attribute_names in the graph's node data that hold the vote totals for each party. - :type columns: List[str] - :ivar parties_to_columns: A dictionary mapping party names to the columns + :type node_attribute_names: List[str] + :ivar party_names_to_node_attribute_names: A dictionary mapping party names to the node_attribute_names in the graph's node data that hold the vote totals for that party. - :type parties_to_columns: Dict[str, str] + :type party_names_to_node_attribute_names: Dict[str, str] :ivar tallies: A dictionary mapping party names to :class:`DataTally` objects that manage the vote totals for that party. :type tallies: Dict[str, DataTally] @@ -68,86 +68,103 @@ class Election: def __init__( self, name: str, - parties_to_columns: Union[Dict, List], + party_names_to_node_attribute_names: Union[Dict, List], alias: Optional[str] = None, ) -> None: """ :param name: The name of the election. (e.g. "2008 Presidential") :type name: str - :param parties_to_columns: A dictionary matching party names to their - data columns, either as actual columns (list-like, indexed by nodes) + :param party_names_to_node_attribute_names: A mapping from the name of a + party to the name of an attribute of a node that contains the + vote totals for that party. This parameter can be either a list or + a dict. If a list, then the name of the party and the name of the + node attribute are the same, for instance: ["Dem", "Rep"] would + indicate that the "Dem" party vote totals are stored in the "Dem" + node attribute. If a list, then there are two possibilities. + + A dictionary matching party names to their + data node_attribute_names, either as actual node_attribute_names (list-like, indexed by nodes) or as string keys for the node attributes that hold the party's vote totals. Or, a list of strings which will serve as both the party names and the node attribute keys. - :type parties_to_columns: Union[Dict, List] + :type party_names_to_node_attribute_names: Union[Dict, List] :param alias: Alias that the election is registered under in the Partition's dictionary of updaters. :type alias: Optional[str], optional """ + self.name = name if alias is None: alias = name self.alias = alias - if isinstance(parties_to_columns, dict): - self.parties = list(parties_to_columns.keys()) - self.columns = list(parties_to_columns.values()) - self.parties_to_columns = parties_to_columns - elif isinstance(parties_to_columns, list): - self.parties = parties_to_columns - self.columns = parties_to_columns - self.parties_to_columns = dict(zip(self.parties, self.columns)) + # Canonicalize "parties", "node_attribute_names", and "party_names_to_node_attribute_names": + # + # "parties" are the names of the parties for purposes of reporting + # "node_attribute_names" are the names of the node attributes storing vote counts + # "party_names_to_node_attribute_names" is a mapping from one to the other + # + if isinstance(party_names_to_node_attribute_names, dict): + self.parties = list(party_names_to_node_attribute_names.keys()) + self.node_attribute_names = list(party_names_to_node_attribute_names.values()) + self.party_names_to_node_attribute_names = party_names_to_node_attribute_names + elif isinstance(party_names_to_node_attribute_names, list): + # name of the party and the attribute name containing value is the same + self.parties = party_names_to_node_attribute_names + self.node_attribute_names = party_names_to_node_attribute_names + self.party_names_to_node_attribute_names = dict(zip(self.parties, self.node_attribute_names)) else: - raise TypeError("Election expects parties_to_columns to be a dict or list") + raise TypeError("Election expects party_names_to_node_attribute_names to be a dict or list") + + # A DataTally can be created with a first parameter that is either a string + # or a dict. If a string, then the DataTally will interpret that string as + # the name of the node's attribute value that stores the data to be summed. + # However, if the first parameter is a node, then the DataTally will just + # access the value in the dict for a given node to get the data to be + # summed. The string approach makes it easy/convenient to sum data that + # is already stored as attribute values, while the dict approach makes + # it possible to sum computed values that are not stored as node attributes. + # + # However, after converting to using RustworkX for Graph objects in + # partitions, it no longer makes sense to use an explicit dict associating + # node_ids with data values for an election, because the node_ids given + # would need to be "original" node_ids derived from the NX-based graph + # that existed before creating the first partition, and those node_ids + # are useless once we convert the NX-based graph to RustworkX. + # + # So we disallow using a dict as a parameter to the DataTally below + # + + for party in self.parties: + if isinstance(self.party_names_to_node_attribute_names[party], dict): + raise Exception("Election: Using explicit node_id to vote totals maps is no longer permitted") self.tallies = { - party: DataTally(self.parties_to_columns[party], party) + party: DataTally(self.party_names_to_node_attribute_names[party], party) for party in self.parties } self.updater = ElectionUpdater(self) def _initialize_self(self, partition): - """ - Because node_ids are changed when converting from NX to RX based graphs when - we create a partition, we need to delay initialization of internal data members - that depend on node_ids until AFTER the partition has been created. That is - because we don't know how to map original node_ids to internal node_ids until - the partition is created. - - Note that the fact that node_ids have changed is hidden by the fact that - """ - - # frm: TODO: Clean this up... - # - # This is a mess - I am going to reset to the original code and make - # 100% sure I grok what is happening... - - """ - Convert _original_parties_to_columns to use internal_ids - - internal_parties_to_columns = ??? translate original node_ids... - ??? handle the case when instead of a dict it is a list - - Then just use the code from before, but with new node_ids - """ - # Compute totals for each "party" => dict of form: {part: sum} - # where "part" is a district in partition + # Create DataTally objects for each party in the election. self.tallies = { - party: DataTally(self.parties_to_columns[party], party) + # For each party, create a DataTally using the string for the node + # attribute where that party's vote totals can be found. + party: DataTally(self.party_names_to_node_attribute_names[party], party) for party in self.parties } def __str__(self): - return "Election '{}' with vote totals for parties {} from columns {}.".format( - self.name, str(self.parties), str(self.columns) + return "Election '{}' with vote totals for parties {} from node_attribute_names {}.".format( + self.name, str(self.parties), str(self.node_attribute_names) ) def __repr__(self): - return "Election(parties={}, columns={}, alias={})".format( - str(self.parties), str(self.columns), str(self.alias) + return "Election(parties={}, node_attribute_names={}, alias={})".format( + str(self.parties), str(self.node_attribute_names), str(self.alias) ) def __call__(self, *args, **kwargs): diff --git a/gerrychain/updaters/flows.py b/gerrychain/updaters/flows.py index 7f6706e5..fe48fb08 100644 --- a/gerrychain/updaters/flows.py +++ b/gerrychain/updaters/flows.py @@ -39,6 +39,13 @@ def flows_from_changes(old_partition, new_partition) -> Dict: `{'in': , 'out': }`. :rtype: Dict """ + + # frm: TODO: Grok why there is a test for: source != target + # + # It would seem to me that it would be a logic bug if there + # was a "flip" that did not in fact change the partition mapping... + # + flows = collections.defaultdict(create_flow) for node, target in new_partition.flips.items(): source = old_partition.assignment.mapping[node] diff --git a/gerrychain/updaters/locality_split_scores.py b/gerrychain/updaters/locality_split_scores.py index 130df528..dc56f746 100644 --- a/gerrychain/updaters/locality_split_scores.py +++ b/gerrychain/updaters/locality_split_scores.py @@ -160,8 +160,6 @@ def __call__(self, partition): # if self.localities == []: - # frm: TODO: NX vs. RX issues here. graph.nodes(data=) is NX specific... - # frm: Original code: # self.localitydict = dict(partition.graph.nodes(data=self.col_id)) # diff --git a/gerrychain/updaters/tally.py b/gerrychain/updaters/tally.py index 9b9a6d5a..082bd675 100644 --- a/gerrychain/updaters/tally.py +++ b/gerrychain/updaters/tally.py @@ -46,24 +46,6 @@ def initialize_tally(partition): # frm: TODO: Verify that if the "data" passed in is not a string that it # is of the form: {node_id, data_value} - # frm: TODO: If self.data is a dict: {node: votes} then check if original node_ids - # - # This came up with Election udpaters - if you specify the data in an explicit - # dict of {node: votes}, then things get screwed up because at the time you create - # the Election object, the partition has not yet been created, so the node_ids are - # original node_ids which are not appropriate after the partition has been created - # and the new RX graph has new node_ids. - # - # In the Election updater case, the fix would be to delay the initial tally to - # happen AFTER the partition is created and to at some point before doing the - # initial tally, translate the original node_ids to be internal RX node_ids. - # - # However, I am wondering if this problem is a general problem with tallies - # made by other updaters. Stated differently, is it safe to assume that an - # explicit dict of {node_id: votes} is ALWAYS done with original node_ids in all - # cases of the use of tallies? - # - # => What other code uses Tally? if isinstance(self.data, str): @@ -75,6 +57,9 @@ def initialize_tally(partition): node_ids = partition.graph.node_indices attribute = self.data self.data = {node_id: graph.node_data(node_id)[attribute] for node_id in node_ids} + + # frm: TODO: Should probably check that the data for each node is numerical, since we + # are going to sum it below... tally = collections.defaultdict(int) for node_id, part in partition.assignment.items(): diff --git a/tests/constraints/test_contiguity.py b/tests/constraints/test_contiguity.py index e40d1e77..b5f7acde 100644 --- a/tests/constraints/test_contiguity.py +++ b/tests/constraints/test_contiguity.py @@ -23,10 +23,10 @@ def test_contiguous_components(graph): # to compare against the original node_ids, since RX node_ids change every time # you create a subgraph. - assert set(frozenset(g.original_node_ids_for_set(g.nodes)) for g in components[1]) == { + assert set(frozenset(g.original_nx_node_ids_for_set(g.nodes)) for g in components[1]) == { frozenset([0, 1, 2]), frozenset([6, 7, 8]), } - assert set(frozenset(g.original_node_ids_for_set(g.nodes)) for g in components[2]) == { + assert set(frozenset(g.original_nx_node_ids_for_set(g.nodes)) for g in components[2]) == { frozenset([3, 4, 5]), } diff --git a/tests/frm_tests/nx_rx_play.py b/tests/frm_tests/nx_rx_play.py deleted file mode 100644 index 283b99ea..00000000 --- a/tests/frm_tests/nx_rx_play.py +++ /dev/null @@ -1,126 +0,0 @@ -####################################################### -# frm: Overview of test_frm_nx_rx_graph.py -# -# This test exists to test how NX and RX differ. -# -# It will probably evolve into a way to test whether stuff -# works the same in the new Graph object with NX vs. RX -# under the covers... -# -# - -# frm TODO: Convert this into a pytest format... - -import matplotlib.pyplot as plt -from gerrychain import (Partition, Graph, MarkovChain, - updaters, constraints, accept) -from gerrychain.proposals import recom -from gerrychain.constraints import contiguous -from functools import partial -import pandas - -import os -import rustworkx as rx -import networkx as nx - -import pytest - - -# Set the random seed so that the results are reproducible! -import random -random.seed(2024) - -# Create NX and RX Graph objects with their underlying NX and RX graphs - -# Get path to the JSON containing graph data -test_file_path = os.path.abspath(__file__) -cur_directory = os.path.dirname(test_file_path) -path_for_json_file = os.path.join(cur_directory, "gerrymandria.json") -# print("json file is: ", json_file_path) - -# Create an NX based Graph object from the JSON -gerrychain_nx_graph = Graph.from_json(path_for_json_file) - -# Fetch the NX graph object from inside the Graph object -nx_graph = gerrychain_nx_graph.get_nx_graph() - -# Create an RX graph object from NX and set node type to be a dictionary to preserve data attributes -rx_graph = rx.networkx_converter(nx_graph, keep_attributes=True) - -# Create a Graph object with an RX graph inside -gerrychain_rx_graph = Graph.from_rustworkx(rx_graph) - -# frm: ???: TODO: The set(rx_graph.nodes()) fails because it returns dictionaries which Python does not like... -# nx_set_of_nodes = set(nx_graph.nodes()) -# print("len nx_set_of_nodes is: ", len(nx_set_of_nodes)) -# rx_set_of_nodes = set(rx_graph.nodes()) -# print("len rx_set_of_nodes is: ", len(rx_set_of_nodes)) -# print("NX nodes: ", nx_set_of_nodes) -# print("RX nodes: ", rx_set_of_nodes) - -print("Testing node data dict") -print("NX data dict for node 1: ", gerrychain_nx_graph.node_data(1)) -print("RX data dict for node 1: ", gerrychain_rx_graph.node_data(1)) - -""" -Stuff to figure out / test: - * graph data - that is, data on the graph itself. - * NX - graph = nx.Graph(day="Friday") - graph['day'] = "Monday" - * RX - graph = rx.PyGraph(attrs=dict(day="Friday")) - graph.attrs['day'] = "Monday" - * graph.nodes - * NX - This is a NodeView: - nodes[x] gives dict for data for the node - * RX - RX does not have this. Instead it has nodes() which just returns - a list/set of the node indices. - Actually, a node in RX can be any Python object, but in particular, it - can be a dictionary. In my test case, I think the node in the graph is just - an integer, but it should instead be a dict. Can the value of one node differ - from that of another node? That is, can you have a graph where the nodes are - of different types? This would semantically make no sense, but maybe it is - possible. - What is nice about this is that graph[node_id] in RX will be the data for the - node - in our case a dictionary. So the syntax to access a node's data dictionary - will be different, but both work: - - NX: graph.nodes[node_id] - RX: graph[node_id] - - * Comments: - The code had graph.nodes[node_id][value_id] = new_value all over. I changed - the code to instead do graph.node_data(node_id) but I think that - users are used to doing it the old way => need to create a NodeView for RX... - * graph.edges() - * NX - * RX - * graph.edges[edge]["random_weight"] = weight - * NX - * RX - * graph.node_indices - * NX - * RX - * graph.neighbors(node) - * NX - * RX - * graph.add_edge(node, node) - => next_node(node) local function needs to return int node ID - * NX - * RX - * graph.degree(node) - * NX - * RX - * iter(graph) - * NX - * RX - * graph._degrees[node] => what is _degrees? - * NX - * RX - * graph.edges - * NX - * RX -""" diff --git a/tests/frm_tests/test_frm_sanity.py b/tests/frm_tests/test_frm_sanity.py deleted file mode 100644 index aa3f72f3..00000000 --- a/tests/frm_tests/test_frm_sanity.py +++ /dev/null @@ -1,3 +0,0 @@ - -def test_doit(): - assert 2 + 2 == 4 diff --git a/tests/frm_tests/test_to_networkx_graph.py b/tests/frm_tests/test_to_networkx_graph.py new file mode 100644 index 00000000..54f85ab3 --- /dev/null +++ b/tests/frm_tests/test_to_networkx_graph.py @@ -0,0 +1,195 @@ +# +# This tests whether the routine, to_networkx_graph(), works +# properly. +# +# This routine extracts a new NetworkX.Graph object from an +# Graph object that is based on RustworkX. When we create +# a Partition object from an NetworkX Graph we convert the +# graph to RustworkX for performance. However, users might +# want to have access to a NetworkX Graph for a variety of +# reasons: mostly because they built their initial graph as +# a NetworkX Graph and they used node_ids that made sense to +# them at the time and would like to access the graph at +# the end of a MarkovChain run using those same "original" +# IDs. +# +# The extracted NetworkX Graph should have the "original" +# node_ids, and it should have all of the node and edge +# data that was in the RustworkX Graph object. +# + + +import networkx as nx + +from gerrychain.graph import Graph +from gerrychain.partition import Partition + +def test_to_networkx_graph_works(): + + """ + Create an NX graph (grid) that looks like this: + + 'A' 'B' 'C' + 'D' 'E' 'F' + 'G' 'H' 'I' + """ + + nx_graph = nx.Graph() + nx_graph.add_edges_from( + [ + ('A', 'B'), + ('A', 'D'), + ('B', 'C'), + ('B', 'E'), + ('C', 'F'), + ('D', 'E'), + ('D', 'G'), + ('E', 'F'), + ('E', 'H'), + ('F', 'I'), + ('G', 'H'), + ('H', 'I'), + ] + ) + + # Add some node and edge data to the nx_graph + + graph_node_ids = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I'] + for node_id in graph_node_ids: + nx_graph.nodes[node_id]["nx-node-data"] = node_id + + nx_graph.edges[('A','B')]["nx-edge-data"] = ('A','B') + nx_graph.edges[('A','D')]["nx-edge-data"] = ('A','D') + nx_graph.edges[('B','C')]["nx-edge-data"] = ('B','C') + nx_graph.edges[('B','E')]["nx-edge-data"] = ('B','E') + nx_graph.edges[('C','F')]["nx-edge-data"] = ('C','F') + nx_graph.edges[('D','E')]["nx-edge-data"] = ('D','E') + nx_graph.edges[('D','G')]["nx-edge-data"] = ('D','G') + nx_graph.edges[('E','F')]["nx-edge-data"] = ('E','F') + nx_graph.edges[('E','H')]["nx-edge-data"] = ('E','H') + nx_graph.edges[('F','I')]["nx-edge-data"] = ('F','I') + nx_graph.edges[('G','H')]["nx-edge-data"] = ('G','H') + nx_graph.edges[('H','I')]["nx-edge-data"] = ('H','I') + + graph = Graph.from_networkx(nx_graph) + + """ + Create a partition assigning each "row" of + nodes to a part (district), so the assignment + looks like: + + 0 0 0 + 1 1 1 + 2 2 2 + """ + + initial_assignment = { + 'A': 0, + 'B': 0, + 'C': 0, + 'D': 1, + 'E': 1, + 'F': 1, + 'G': 2, + 'H': 2, + 'I': 2, + } + + # Create a partition + partition = Partition(graph, initial_assignment) + + # The partition's graph object has been converted to be based on RX + new_graph = partition.graph + + # Add some additional data + for node_id in new_graph.node_indices: + new_graph.node_data(node_id)["internal-node-data"] = new_graph.original_nx_node_id_for_internal_node_id(node_id) + for edge_id in new_graph.edge_indices: + new_graph.edge_data(edge_id)["internal-edge-data"] = "internal-edge-data" + + # Now create a second partition by flipping the + # nodes in the first row to be in part (district) 1 + + """ + The new partition's mapping of nodes to parts should look like this: + + 1 1 1 + 1 1 1 + 2 2 2 + """ + + flips = {'A': 1, 'B': 1, 'C': 1} + # Create a new partition based on these flips - using "original" node_ids + new_partition = partition.flip(flips, use_original_nx_node_ids=True) + + + # Get the NX graph after doing the flips. + extracted_nx_graph = new_partition.graph.to_networkx_graph() + + # Get the assignments for both the initial partition and the new_partition + + internal_assignment_0 = partition.assignment + internal_assignment_1 = new_partition.assignment + + # convert the internal assignments into "original" node_ids + original_assignment_0 = {} + for node_id, part in internal_assignment_0.items(): + original_nx_node_id = partition.graph.original_nx_node_id_for_internal_node_id(node_id) + original_assignment_0[original_nx_node_id] = part + original_assignment_1 = {} + for node_id, part in internal_assignment_1.items(): + original_nx_node_id = partition.graph.original_nx_node_id_for_internal_node_id(node_id) + original_assignment_1[original_nx_node_id] = part + + # Check that all is well... + + # Check that the initial assignment is the same as the internal RX-based assignment + for node_id, part in initial_assignment.items(): + assert (part == original_assignment_0[node_id]) + + # Check that the flips did what they were supposed to do + for node_id in ['A', 'B', 'C', 'D', 'E', 'F']: + assert(original_assignment_1[node_id] == 1) + for node_id in ['G', 'H', 'I']: + assert(original_assignment_1[node_id] == 2) + + # Check that the node and edge data is present + + # Check node data + for node_id in extracted_nx_graph.nodes: + # Data assigned to the NX-Graph should still be there... + assert( + extracted_nx_graph.nodes[node_id]["nx-node-data"] + == + nx_graph.nodes[node_id]["nx-node-data"] + ) + # Data assigned to the partition's RX-Graph should still be there... + assert( + extracted_nx_graph.nodes[node_id]["internal-node-data"] + == + node_id + ) + # Node_id agrees with __networkx_node__ (created by RX conversion) + assert( + node_id + == + extracted_nx_graph.nodes[node_id]["__networkx_node__"] + ) + + + # Check edge data + for edge in extracted_nx_graph.edges: + assert( + extracted_nx_graph.edges[edge]["nx-edge-data"] + == + nx_graph.edges[edge]["nx-edge-data"] + ) + # Data assigned to the partition's RX-Graph should still be there... + assert( + extracted_nx_graph.edges[edge]["internal-edge-data"] + == + "internal-edge-data" + ) + # compare the extracted_nx_graph's nodes and edges to see if they make sense + # Compare node_data and edge_data + diff --git a/tests/partition/test_partition.py b/tests/partition/test_partition.py index b35a95f9..77381254 100644 --- a/tests/partition/test_partition.py +++ b/tests/partition/test_partition.py @@ -79,7 +79,7 @@ def test_geographic_partition_can_be_instantiated(example_geographic_partition): def test_Partition_parts_is_a_dictionary_of_parts_to_nodes(example_partition): partition = example_partition flip = {1: 2} - new_partition = partition.flip(flip, use_original_node_ids=True) + new_partition = partition.flip(flip, use_original_nx_node_ids=True) assert all(isinstance(nodes, frozenset) for nodes in new_partition.parts.values()) assert all(isinstance(nodes, frozenset) for nodes in partition.parts.values()) @@ -98,12 +98,12 @@ def test_Partition_has_subgraphs(example_partition): partition = example_partition subgraph_for_part_1 = partition.subgraphs[1] - internal_node_id_0 = subgraph_for_part_1.internal_node_id_for_original_node_id(0) - internal_node_id_1 = subgraph_for_part_1.internal_node_id_for_original_node_id(1) + internal_node_id_0 = subgraph_for_part_1.internal_node_id_for_original_nx_node_id(0) + internal_node_id_1 = subgraph_for_part_1.internal_node_id_for_original_nx_node_id(1) assert set(partition.subgraphs[1].nodes) == {internal_node_id_0, internal_node_id_1} subgraph_for_part_2 = partition.subgraphs[2] - internal_node_id = subgraph_for_part_2.internal_node_id_for_original_node_id(2) + internal_node_id = subgraph_for_part_2.internal_node_id_for_original_nx_node_id(2) assert set(partition.subgraphs[2].nodes) == {internal_node_id} assert len(list(partition.subgraphs)) == 2 @@ -129,8 +129,8 @@ def test_can_be_created_from_a_districtr_file(graph, districtr_plan_file): internal_node_assignment = partition.assignment.to_dict() original_node_assignment = {} for internal_node_id, part in internal_node_assignment.items(): - original_node_id = partition.graph.original_node_id_for_internal_node_id(internal_node_id) - original_node_assignment[original_node_id] = part + original_nx_node_id = partition.graph.original_nx_node_id_for_internal_node_id(internal_node_id) + original_node_assignment[original_nx_node_id] = part assert original_node_assignment == { 0: 1, diff --git a/tests/test_chain.py b/tests/test_chain.py index 8f4902b3..1ffd7554 100644 --- a/tests/test_chain.py +++ b/tests/test_chain.py @@ -4,12 +4,12 @@ class MockState: - def flip(self, changes, use_original_node_ids): + def flip(self, changes, use_original_nx_node_ids): return MockState() def mock_proposal(state): - return state.flip({1: 2}, use_original_node_ids=True) + return state.flip({1: 2}, use_original_nx_node_ids=True) def mock_accept(state): diff --git a/tests/test_make_graph.py b/tests/test_make_graph.py index 1fa1d96c..31bfb98b 100644 --- a/tests/test_make_graph.py +++ b/tests/test_make_graph.py @@ -79,7 +79,8 @@ def test_add_data_to_graph_can_handle_column_names_that_start_with_numbers(): df = pandas.DataFrame({"16SenDVote": [20, 30, 50], "node": ["01", "02", "03"]}) df = df.set_index("node") - # frm: Note that the new Graph does not support add_data() + # frm: Note that the new Graph only supports the add_data() routine if + # the underlying graph object is an NX Graph graph = Graph.from_networkx(nx_graph) @@ -203,8 +204,15 @@ def test_computes_boundary_perims(geodataframe_with_boundary): def edge_set_equal(set1, set2): - return {(y, x) for x, y in set1} | set1 == {(y, x) for x, y in set2} | set2 - + # Returns true if the two sets have the same edges. The complication is that + # for an edge, (1,2) is the same as (2,1), so to compare them you need to + # canonicalize the edges somehow. This code just takes set1 and set2 and + # creates a new set for each that has both edge pairs for each edge, and + # it then compares those new sets. + # + canonical_set1 = {(y, x) for x, y in set1} | set1 + canonical_set2 = {(y, x) for x, y in set2} | set2 + return canonical_set1 == canonical_set2 def test_from_file_adds_all_data_by_default(shapefile): graph = Graph.from_file(shapefile) diff --git a/tests/test_metagraph.py b/tests/test_metagraph.py index 4b5a587a..32c58617 100644 --- a/tests/test_metagraph.py +++ b/tests/test_metagraph.py @@ -13,6 +13,16 @@ def partition(graph): def test_all_cut_edge_flips(partition): + # frm: TODO: Maybe change all_cut_edge_flips to return a dict + # + # At present, it returns an iterator, which makes the code below + # more complicated than it needs to be. If it just returned + # a dict, then the code would be: + # + # result = set( + # node, part for all_cut_edge_flips(partition).items() + # ) + # result = set( (node, part) for flip in all_cut_edge_flips(partition) @@ -22,10 +32,9 @@ def test_all_cut_edge_flips(partition): # Convert from internal node_ids to "original" node_ids new_result = set() for internal_node_id, part in result: - original_node_id = partition.graph.original_node_id_for_internal_node_id(internal_node_id) - new_result.add((original_node_id, part)) + original_nx_node_id = partition.graph.original_nx_node_id_for_internal_node_id(internal_node_id) + new_result.add((original_nx_node_id, part)) - # frm: TODO: stmt below fails - the "result" has (2,2) instead of (3,2) assert new_result == {(6, 1), (7, 1), (8, 1), (4, 2), (5, 2), (3, 2)} @@ -53,6 +62,11 @@ def disallow_six_to_one(partition): constraints = [disallow_six_to_one] + # frm: TODO: If I created a utility routine to convert + # a list of flips to original node_ids, + # then I could use that here and then + # convert the resulting list to a set... + result = set( (node, part) for flip in all_valid_flips(partition, constraints) @@ -62,8 +76,7 @@ def disallow_six_to_one(partition): # Convert from internal node_ids to "original" node_ids new_result = set() for internal_node_id, part in result: - original_node_id = partition.graph.original_node_id_for_internal_node_id(internal_node_id) - new_result.add((original_node_id, part)) + original_nx_node_id = partition.graph.original_nx_node_id_for_internal_node_id(internal_node_id) + new_result.add((original_nx_node_id, part)) - # frm: TODO: stmt below fails - the "result" has (2,2) instead of (3,2) assert new_result == {(7, 1), (8, 1), (4, 2), (5, 2), (3, 2)} diff --git a/tests/test_tree.py b/tests/test_tree.py index d2458205..7635f52f 100644 --- a/tests/test_tree.py +++ b/tests/test_tree.py @@ -1,6 +1,7 @@ import functools import networkx +import rustworkx import pytest from gerrychain import MarkovChain @@ -26,70 +27,147 @@ random.seed(2018) +# +# This code is complicated by the need to test both NX-based +# and RX-based Graph objects. +# +# The pattern is to define the test logic in a routine that +# will be run with both NX-based and RX-based Graph objects +# and to then have the actual test case call that logic. +# This buries the asserts down a level, which means that +# figuring out what went wrong if a test fails will be +# slightly more challenging, but it keeps the logic for +# testing both NX-based and RX-based Graph objects clean. +# @pytest.fixture -def graph_with_pop(three_by_three_grid): +def graph_with_pop_nx(three_by_three_grid): + # NX-based Graph object for node in three_by_three_grid: three_by_three_grid.node_data(node)["pop"] = 1 return three_by_three_grid +@pytest.fixture +def graph_with_pop_rx(graph_with_pop_nx): + # RX-based Graph object (same data as NX-based version) + graph_rx = graph_with_pop_nx.convert_from_nx_to_rx() + return graph_rx @pytest.fixture -def partition_with_pop(graph_with_pop): +def partition_with_pop(graph_with_pop_nx): + # No need for an RX-based Graph here because creating the + # Partition object converts the graph to be RX-based if + # it is not already RX-based + # return Partition( - graph_with_pop, + graph_with_pop_nx, {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 1, 6: 1, 7: 1, 8: 1}, updaters={"pop": Tally("pop"), "cut_edges": cut_edges}, ) - @pytest.fixture -def twelve_by_twelve_with_pop(): +def twelve_by_twelve_with_pop_nx(): + # NX-based Graph object + xy_grid = networkx.grid_graph([12, 12]) + + # Relabel nodes with integers rather than tuples. Node + # in cartesian coordinate (x,y) will be relabeled with + # the integer = x*12 + y , which just numbers nodes + # sequentially from 0 by row... + # nodes = {node: node[1] + 12 * node[0] for node in xy_grid} grid = networkx.relabel_nodes(xy_grid, nodes) + for node in grid: grid.nodes[node]["pop"] = 1 return Graph.from_networkx(grid) +@pytest.fixture +def twelve_by_twelve_with_pop_rx(twelve_by_twelve_with_pop_nx): + # RX-based Graph object (same data as NX-based version) + graph_rx = twelve_by_twelve_with_pop_nx.convert_from_nx_to_rx() + return graph_rx -def test_bipartition_tree_returns_a_subset_of_nodes(graph_with_pop): - ideal_pop = sum(graph_with_pop.node_data(node)["pop"] for node in graph_with_pop) / 2 - result = bipartition_tree(graph_with_pop, "pop", ideal_pop, 0.25, 10) - # frm: TODO: Next stmt fails - the result is not a frozenset... +# --------------------------------------------------------------------- + +def do_test_bipartition_tree_random_returns_a_subset_of_nodes(graph): + ideal_pop = sum(graph.node_data(node)["pop"] for node in graph) / 2 + result = bipartition_tree_random(graph, "pop", ideal_pop, 0.25, 10) assert isinstance(result, frozenset) - assert all(node in graph_with_pop.nodes for node in result) + assert all(node in graph.nodes for node in result) +def test_bipartition_tree_random_returns_a_subset_of_nodes(graph_with_pop_nx, graph_with_pop_rx): + # Test both NX-based and RX-based Graph objects + do_test_bipartition_tree_random_returns_a_subset_of_nodes(graph_with_pop_nx) + do_test_bipartition_tree_random_returns_a_subset_of_nodes(graph_with_pop_rx) -def test_bipartition_tree_returns_within_epsilon_of_target_pop(graph_with_pop): - ideal_pop = sum(graph_with_pop.node_data(node)["pop"] for node in graph_with_pop) / 2 +# --------------------------------------------------------------------- + +def do_test_bipartition_tree_random_returns_within_epsilon_of_target_pop(graph): + ideal_pop = sum(graph.node_data(node)["pop"] for node in graph) / 2 epsilon = 0.25 - result = bipartition_tree(graph_with_pop, "pop", ideal_pop, epsilon, 10) + result = bipartition_tree_random(graph, "pop", ideal_pop, epsilon, 10) - part_pop = sum(graph_with_pop.node_data(node)["pop"] for node in result) + part_pop = sum(graph.node_data(node)["pop"] for node in result) assert abs(part_pop - ideal_pop) / ideal_pop < epsilon - -def test_recursive_tree_part_returns_within_epsilon_of_target_pop( - twelve_by_twelve_with_pop, +def test_bipartition_tree_random_returns_within_epsilon_of_target_pop( + graph_with_pop_nx, + graph_with_pop_rx ): + # Test both NX-based and RX-based Graph objects + do_test_bipartition_tree_random_returns_within_epsilon_of_target_pop(graph_with_pop_nx) + do_test_bipartition_tree_random_returns_within_epsilon_of_target_pop(graph_with_pop_rx) + +# --------------------------------------------------------------------- + +def do_test_bipartition_tree_returns_a_subset_of_nodes(graph): + ideal_pop = sum(graph.node_data(node)["pop"] for node in graph) / 2 + result = bipartition_tree(graph, "pop", ideal_pop, 0.25, 10) + assert isinstance(result, frozenset) + assert all(node in graph.nodes for node in result) + +def test_bipartition_tree_returns_a_subset_of_nodes(graph_with_pop_nx, graph_with_pop_rx): + # Test both NX-based and RX-based Graph objects + do_test_bipartition_tree_returns_a_subset_of_nodes(graph_with_pop_nx) + do_test_bipartition_tree_returns_a_subset_of_nodes(graph_with_pop_rx) + +# --------------------------------------------------------------------- + +def do_test_bipartition_tree_returns_within_epsilon_of_target_pop(graph): + ideal_pop = sum(graph.node_data(node)["pop"] for node in graph) / 2 + epsilon = 0.25 + result = bipartition_tree(graph, "pop", ideal_pop, epsilon, 10) + + part_pop = sum(graph.node_data(node)["pop"] for node in result) + assert abs(part_pop - ideal_pop) / ideal_pop < epsilon + +def test_bipartition_tree_returns_within_epsilon_of_target_pop(graph_with_pop_nx, graph_with_pop_rx): + # Test both NX-based and RX-based Graph objects + do_test_bipartition_tree_returns_within_epsilon_of_target_pop(graph_with_pop_nx) + do_test_bipartition_tree_returns_within_epsilon_of_target_pop(graph_with_pop_rx) + +# --------------------------------------------------------------------- + +def do_test_recursive_tree_part_returns_within_epsilon_of_target_pop(twelve_by_twelve_with_pop_graph): n_districts = 7 # 144/7 ≈ 20.5 nodes/subgraph (1 person/node) ideal_pop = ( sum( - twelve_by_twelve_with_pop.node_data(node)["pop"] - for node in twelve_by_twelve_with_pop + twelve_by_twelve_with_pop_graph.node_data(node)["pop"] + for node in twelve_by_twelve_with_pop_graph ) ) / n_districts epsilon = 0.05 result = recursive_tree_part( - twelve_by_twelve_with_pop, + twelve_by_twelve_with_pop_graph, range(n_districts), ideal_pop, "pop", epsilon, ) partition = Partition( - twelve_by_twelve_with_pop, result, updaters={"pop": Tally("pop")} + twelve_by_twelve_with_pop_graph, result, updaters={"pop": Tally("pop")} ) # frm: Original Code: # @@ -102,20 +180,29 @@ def test_recursive_tree_part_returns_within_epsilon_of_target_pop( for part_pop in partition["pop"].values() ) +def test_recursive_tree_part_returns_within_epsilon_of_target_pop( + twelve_by_twelve_with_pop_nx, + twelve_by_twelve_with_pop_rx +): + # Test both NX-based and RX-based Graph objects + do_test_recursive_tree_part_returns_within_epsilon_of_target_pop(twelve_by_twelve_with_pop_nx) + do_test_recursive_tree_part_returns_within_epsilon_of_target_pop(twelve_by_twelve_with_pop_rx) + +# --------------------------------------------------------------------- -def test_recursive_tree_part_returns_within_epsilon_of_target_pop_using_contraction( - twelve_by_twelve_with_pop, +def do_test_recursive_tree_part_returns_within_epsilon_of_target_pop_using_contraction( + twelve_by_twelve_with_pop_graph, ): n_districts = 7 # 144/7 ≈ 20.5 nodes/subgraph (1 person/node) ideal_pop = ( sum( - twelve_by_twelve_with_pop.node_data(node)["pop"] - for node in twelve_by_twelve_with_pop + twelve_by_twelve_with_pop_graph.node_data(node)["pop"] + for node in twelve_by_twelve_with_pop_graph ) ) / n_districts epsilon = 0.05 result = recursive_tree_part( - twelve_by_twelve_with_pop, + twelve_by_twelve_with_pop_graph, range(n_districts), ideal_pop, "pop", @@ -127,7 +214,7 @@ def test_recursive_tree_part_returns_within_epsilon_of_target_pop_using_contract ), ) partition = Partition( - twelve_by_twelve_with_pop, result, updaters={"pop": Tally("pop")} + twelve_by_twelve_with_pop_graph, result, updaters={"pop": Tally("pop")} ) # frm: Original Code: # @@ -141,20 +228,33 @@ def test_recursive_tree_part_returns_within_epsilon_of_target_pop_using_contract for part_pop in partition["pop"].values() ) +def test_recursive_tree_part_returns_within_epsilon_of_target_pop_using_contraction( + twelve_by_twelve_with_pop_nx, + twelve_by_twelve_with_pop_rx +): + # Test both NX-based and RX-based Graph objects + do_test_recursive_tree_part_returns_within_epsilon_of_target_pop_using_contraction( + twelve_by_twelve_with_pop_nx + ) + do_test_recursive_tree_part_returns_within_epsilon_of_target_pop_using_contraction( + twelve_by_twelve_with_pop_rx + ) -def test_recursive_seed_part_returns_within_epsilon_of_target_pop( - twelve_by_twelve_with_pop, +# --------------------------------------------------------------------- + +def do_test_recursive_seed_part_returns_within_epsilon_of_target_pop( + twelve_by_twelve_with_pop_graph, ): n_districts = 7 # 144/7 ≈ 20.5 nodes/subgraph (1 person/node) ideal_pop = ( sum( - twelve_by_twelve_with_pop.node_data(node)["pop"] - for node in twelve_by_twelve_with_pop + twelve_by_twelve_with_pop_graph.node_data(node)["pop"] + for node in twelve_by_twelve_with_pop_graph ) ) / n_districts epsilon = 0.1 result = recursive_seed_part( - twelve_by_twelve_with_pop, + twelve_by_twelve_with_pop_graph, range(n_districts), ideal_pop, "pop", @@ -163,7 +263,7 @@ def test_recursive_seed_part_returns_within_epsilon_of_target_pop( ceil=None, ) partition = Partition( - twelve_by_twelve_with_pop, result, updaters={"pop": Tally("pop")} + twelve_by_twelve_with_pop_graph, result, updaters={"pop": Tally("pop")} ) # frm: Original Code: # @@ -176,20 +276,29 @@ def test_recursive_seed_part_returns_within_epsilon_of_target_pop( for part_pop in partition["pop"].values() ) +def test_recursive_seed_part_returns_within_epsilon_of_target_pop( + twelve_by_twelve_with_pop_nx, + twelve_by_twelve_with_pop_rx +): + # Test both NX-based and RX-based Graph objects + do_test_recursive_seed_part_returns_within_epsilon_of_target_pop(twelve_by_twelve_with_pop_nx) + do_test_recursive_seed_part_returns_within_epsilon_of_target_pop(twelve_by_twelve_with_pop_rx) -def test_recursive_seed_part_returns_within_epsilon_of_target_pop_using_contraction( - twelve_by_twelve_with_pop, +# --------------------------------------------------------------------- + +def do_test_recursive_seed_part_returns_within_epsilon_of_target_pop_using_contraction( + twelve_by_twelve_with_pop_graph, ): n_districts = 7 # 144/7 ≈ 20.5 nodes/subgraph (1 person/node) ideal_pop = ( sum( - twelve_by_twelve_with_pop.node_data(node)["pop"] - for node in twelve_by_twelve_with_pop + twelve_by_twelve_with_pop_graph.node_data(node)["pop"] + for node in twelve_by_twelve_with_pop_graph ) ) / n_districts epsilon = 0.1 result = recursive_seed_part( - twelve_by_twelve_with_pop, + twelve_by_twelve_with_pop_graph, range(n_districts), ideal_pop, "pop", @@ -203,7 +312,7 @@ def test_recursive_seed_part_returns_within_epsilon_of_target_pop_using_contract ), ) partition = Partition( - twelve_by_twelve_with_pop, result, updaters={"pop": Tally("pop")} + twelve_by_twelve_with_pop_graph, result, updaters={"pop": Tally("pop")} ) # frm: Original Code: # @@ -216,8 +325,21 @@ def test_recursive_seed_part_returns_within_epsilon_of_target_pop_using_contract for part_pop in partition["pop"].values() ) +def test_recursive_seed_part_returns_within_epsilon_of_target_pop_using_contraction( + twelve_by_twelve_with_pop_nx, + twelve_by_twelve_with_pop_rx +): + # Test both NX-based and RX-based Graph objects + do_test_recursive_seed_part_returns_within_epsilon_of_target_pop_using_contraction( + twelve_by_twelve_with_pop_nx + ) + do_test_recursive_seed_part_returns_within_epsilon_of_target_pop_using_contraction( + twelve_by_twelve_with_pop_rx + ) -def test_recursive_seed_part_uses_method(twelve_by_twelve_with_pop): +# --------------------------------------------------------------------- + +def do_test_recursive_seed_part_uses_method(twelve_by_twelve_with_pop_graph): calls = 0 def dummy_method(graph, pop_col, pop_target, epsilon, node_repeats, one_sided_cut): @@ -236,13 +358,13 @@ def dummy_method(graph, pop_col, pop_target, epsilon, node_repeats, one_sided_cu n_districts = 7 # 144/7 ≈ 20.5 nodes/subgraph (1 person/node) ideal_pop = ( sum( - twelve_by_twelve_with_pop.node_data(node)["pop"] - for node in twelve_by_twelve_with_pop + twelve_by_twelve_with_pop_graph.node_data(node)["pop"] + for node in twelve_by_twelve_with_pop_graph ) ) / n_districts epsilon = 0.1 result = recursive_seed_part( - twelve_by_twelve_with_pop, + twelve_by_twelve_with_pop_graph, range(n_districts), ideal_pop, "pop", @@ -257,20 +379,26 @@ def dummy_method(graph, pop_col, pop_target, epsilon, node_repeats, one_sided_cu # implementation detail) assert calls >= n_districts - 1 +def test_recursive_seed_part_uses_method(twelve_by_twelve_with_pop_nx, twelve_by_twelve_with_pop_rx): + # Test both NX-based and RX-based Graph objects + do_test_recursive_seed_part_uses_method(twelve_by_twelve_with_pop_nx) + do_test_recursive_seed_part_uses_method(twelve_by_twelve_with_pop_rx) -def test_recursive_seed_part_with_n_unspecified_within_epsilon( - twelve_by_twelve_with_pop, +# --------------------------------------------------------------------- + +def do_test_recursive_seed_part_with_n_unspecified_within_epsilon( + twelve_by_twelve_with_pop_graph, ): n_districts = 6 # This should set n=3 ideal_pop = ( sum( - twelve_by_twelve_with_pop.node_data(node)["pop"] - for node in twelve_by_twelve_with_pop + twelve_by_twelve_with_pop_graph.node_data(node)["pop"] + for node in twelve_by_twelve_with_pop_graph ) ) / n_districts epsilon = 0.05 result = recursive_seed_part( - twelve_by_twelve_with_pop, + twelve_by_twelve_with_pop_graph, range(n_districts), ideal_pop, "pop", @@ -278,7 +406,7 @@ def test_recursive_seed_part_with_n_unspecified_within_epsilon( ceil=None, ) partition = Partition( - twelve_by_twelve_with_pop, result, updaters={"pop": Tally("pop")} + twelve_by_twelve_with_pop_graph, result, updaters={"pop": Tally("pop")} ) # frm: Original Code: # @@ -291,35 +419,114 @@ def test_recursive_seed_part_with_n_unspecified_within_epsilon( for part_pop in partition["pop"].values() ) +def test_recursive_seed_part_with_n_unspecified_within_epsilon( + twelve_by_twelve_with_pop_nx, + twelve_by_twelve_with_pop_rx +): + # Test both NX-based and RX-based Graph objects + do_test_recursive_seed_part_with_n_unspecified_within_epsilon(twelve_by_twelve_with_pop_nx) + do_test_recursive_seed_part_with_n_unspecified_within_epsilon(twelve_by_twelve_with_pop_rx) + +# --------------------------------------------------------------------- -def test_random_spanning_tree_returns_tree_with_pop_attribute(graph_with_pop): - tree = random_spanning_tree(graph_with_pop) - assert networkx.is_tree(tree) +def do_test_random_spanning_tree_returns_tree_with_pop_attribute(graph): + tree = random_spanning_tree(graph) + # frm: Original code: assert networkx.is_tree(tree) + assert tree.is_a_tree() +def test_random_spanning_tree_returns_tree_with_pop_attribute(graph_with_pop_nx, graph_with_pop_rx): + # Test both NX-based and RX-based Graph objects + do_test_random_spanning_tree_returns_tree_with_pop_attribute(graph_with_pop_nx) + do_test_random_spanning_tree_returns_tree_with_pop_attribute(graph_with_pop_rx) -def test_uniform_spanning_tree_returns_tree_with_pop_attribute(graph_with_pop): - tree = uniform_spanning_tree(graph_with_pop) - # frm: TODO: Get rid of networkx dependency - assert networkx.is_tree(tree) +# --------------------------------------------------------------------- +def do_test_uniform_spanning_tree_returns_tree_with_pop_attribute(graph): + tree = uniform_spanning_tree(graph) + # frm: Original code: assert networkx.is_tree(tree) + assert tree.is_a_tree() -def test_bipartition_tree_returns_a_tree(graph_with_pop): - ideal_pop = sum(graph_with_pop.node_data(node)["pop"] for node in graph_with_pop) / 2 - tree = Graph.from_networkx( - networkx.Graph([(0, 1), (1, 2), (1, 4), (3, 4), (4, 5), (3, 6), (6, 7), (6, 8)]) - ) - for node in tree: - tree.node_data(node)["pop"] = graph_with_pop.node_data(node)["pop"] +def test_uniform_spanning_tree_returns_tree_with_pop_attribute(graph_with_pop_nx, graph_with_pop_rx): + # Test both NX-based and RX-based Graph objects + do_test_uniform_spanning_tree_returns_tree_with_pop_attribute(graph_with_pop_nx) + do_test_uniform_spanning_tree_returns_tree_with_pop_attribute(graph_with_pop_rx) + +# --------------------------------------------------------------------- + +def do_test_bipartition_tree_returns_a_tree(graph, spanning_tree): + ideal_pop = sum(graph.node_data(node)["pop"] for node in graph) / 2 result = bipartition_tree( - graph_with_pop, "pop", ideal_pop, 0.25, 10, tree, lambda x: 4 + graph, "pop", ideal_pop, 0.25, 10, spanning_tree, lambda x: 4 ) - assert networkx.is_tree(tree.subgraph(result)) - assert networkx.is_tree( - tree.subgraph({node for node in tree if node not in result}) - ) + # frm: Original code: + # assert networkx.is_tree(spanning_tree.subgraph(result)) + # assert networkx.is_tree( + # spanning_tree.subgraph({node for node in tree if node not in result}) + # ) + assert spanning_tree.subgraph(result).is_a_tree() + assert spanning_tree.subgraph({node for node in spanning_tree if node not in result}).is_a_tree() + +def create_graphs_from_nx_edges(list_of_edges_nx, nx_to_rx_node_id_map): + + # NX is easy - just use the lsit of NX edges + graph_nx = Graph.from_networkx(networkx.Graph(list_of_edges_nx)) + + print(f"create_graphs_from_nx_edges: list_of_edges_nx is: {list_of_edges_nx}") + print(f"create_graphs_from_nx_edges: nx_to_rx_node_id_map: {nx_to_rx_node_id_map}") + + # RX requires more work. First we have to translate the node_ids used in the + # list of edges to be the ones used in the RX graph using the + # nx_to_rx_node_id_map. Then we need to create a rustworkx.PyGraph and then + # from that create a "new" Graph object. + + rx_graph = rustworkx.PyGraph() + + # translate the NX edges into the appropriate node_ids for the derived RX graph + empty_edge_data_dict = {} # needed so we can attach edge data to each edge + list_of_edges_rx = [ + ( + nx_to_rx_node_id_map[edge[0]], + nx_to_rx_node_id_map[edge[1]], + empty_edge_data_dict + ) + for edge in list_of_edges_nx + ] + rx_graph = rustworkx.PyGraph(); + + # Create the RX nodes + for i in range(9): + empty_node_data_dict = {} # needed so we can attach node data to each node + rx_graph.add_node(empty_node_data_dict) + # Verify that the nodes created have node_ids 0-8 + assert(set(rx_graph.node_indices()) == set(range(9))) + + # Add the RX edges + rx_graph.add_edges_from(list_of_edges_rx) + graph_rx = Graph.from_rustworkx(rx_graph) + + return graph_nx, graph_rx + +def test_bipartition_tree_returns_a_tree(graph_with_pop_nx, graph_with_pop_rx): + # Test both NX-based and RX-based Graph objects + + spanning_tree_edges_nx = [(0, 1), (1, 2), (1, 4), (3, 4), (4, 5), (3, 6), (6, 7), (6, 8)] + + spanning_tree_nx, spanning_tree_rx = \ + create_graphs_from_nx_edges( + spanning_tree_edges_nx, + graph_with_pop_rx.nx_to_rx_node_id_map + ) + + # Give the nodes a population + for node in spanning_tree_nx: + spanning_tree_nx.node_data(node)["pop"] = 1 + for node in spanning_tree_rx: + spanning_tree_rx.node_data(node)["pop"] = 1 + do_test_bipartition_tree_returns_a_tree(graph_with_pop_nx, spanning_tree_nx) + do_test_bipartition_tree_returns_a_tree(graph_with_pop_rx, spanning_tree_rx) def test_recom_works_as_a_proposal(partition_with_pop): graph = partition_with_pop.graph @@ -334,7 +541,6 @@ def test_recom_works_as_a_proposal(partition_with_pop): for state in chain: assert contiguous(state) - def test_reversible_recom_works_as_a_proposal(partition_with_pop): random.seed(2018) graph = partition_with_pop.graph @@ -395,8 +601,12 @@ def test_reversible_recom_works_as_a_proposal(partition_with_pop): for state in chain: assert contiguous(state) +# frm: TODO: Add more tests using MarkovChain... def test_find_balanced_cuts_contraction(): + + # frm: TODO: Add test for RX-based Graph object + tree = Graph.from_networkx( networkx.Graph([(0, 1), (1, 2), (1, 4), (3, 4), (4, 5), (3, 6), (6, 7), (6, 8)]) ) @@ -418,50 +628,109 @@ def test_find_balanced_cuts_contraction(): def test_no_balanced_cuts_contraction_when_one_side_okay(): - tree = Graph.from_networkx(networkx.Graph([(0, 1), (1, 2), (2, 3), (3, 4)])) + list_of_nodes_nx = ([(0, 1), (1, 2), (2, 3), (3, 4)]) + + # For this test we are not dealing with an RX-based Graph object + # that is derived fromn an NX-based Graph object, so the + # nx_to_rx_node_id_map can just be the identity map... + # + nx_to_rx_node_id_map = { node: node for node in range(5) } + + tree_nx, tree_rx = \ + create_graphs_from_nx_edges( + list_of_nodes_nx, + nx_to_rx_node_id_map + ) + + # OK to use the same populations for NX and RX graphs populations = {0: 4, 1: 4, 2: 3, 3: 3, 4: 3} - populated_tree = PopulatedGraph( - graph=tree, populations=populations, ideal_pop=10, epsilon=0.1 + populated_tree_nx = PopulatedGraph( + graph=tree_nx, populations=populations, ideal_pop=10, epsilon=0.1 + ) + populated_tree_rx = PopulatedGraph( + graph=tree_rx, populations=populations, ideal_pop=10, epsilon=0.1 ) - cuts = find_balanced_edge_cuts_contraction(populated_tree, one_sided_cut=False) - assert cuts == [] + cuts_nx = find_balanced_edge_cuts_contraction(populated_tree_nx, one_sided_cut=False) + assert cuts_nx == [] + + cuts_rx = find_balanced_edge_cuts_contraction(populated_tree_rx, one_sided_cut=False) + assert cuts_rx == [] def test_find_balanced_cuts_memo(): - tree = Graph.from_networkx( - networkx.Graph([(0, 1), (1, 2), (1, 4), (3, 4), (4, 5), (3, 6), (6, 7), (6, 8)]) - ) - # 0 - 1 - 2 - # || - # 3= 4 - 5 - # || - # 6- 7 - # | - # 8 + list_of_nodes_nx = [(0, 1), (1, 2), (1, 4), (3, 4), (4, 5), (3, 6), (6, 7), (6, 8)] - populated_tree = PopulatedGraph( - tree, {node: 1 for node in tree}, len(tree) / 2, 0.5 + # For this test we are not dealing with an RX-based Graph object + # that is derived fromn an NX-based Graph object, so the + # nx_to_rx_node_id_map can just be the identity map... + # + nx_to_rx_node_id_map = { node: node for node in range(9) } + + tree_nx, tree_rx = \ + create_graphs_from_nx_edges( + list_of_nodes_nx, + nx_to_rx_node_id_map + ) + + # 0 - 1 - 2 + # | + # 4 - 3 + # | | + # 5 6 - 7 + # | + # 8 + + populated_tree_nx = PopulatedGraph( + tree_nx, {node: 1 for node in tree_nx}, len(tree_nx) / 2, 0.5 + ) + populated_tree_rx = PopulatedGraph( + tree_rx, {node: 1 for node in tree_rx}, len(tree_rx) / 2, 0.5 ) - cuts = find_balanced_edge_cuts_memoization(populated_tree) - edges = set(tuple(sorted(cut.edge)) for cut in cuts) - assert edges == {(1, 4), (3, 4), (3, 6)} + + cuts_nx = find_balanced_edge_cuts_memoization(populated_tree_nx) + edges_nx = set(tuple(sorted(cut.edge)) for cut in cuts_nx) + assert edges_nx == {(1, 4), (3, 4), (3, 6)} + + cuts_rx = find_balanced_edge_cuts_memoization(populated_tree_rx) + edges_rx = set(tuple(sorted(cut.edge)) for cut in cuts_rx) + assert edges_rx == {(1, 4), (3, 4), (3, 6)} def test_no_balanced_cuts_memo_when_one_side_okay(): - tree = Graph.from_networkx(networkx.Graph([(0, 1), (1, 2), (2, 3), (3, 4)])) + list_of_nodes_nx = ([(0, 1), (1, 2), (2, 3), (3, 4)]) + + # For this test we are not dealing with an RX-based Graph object + # that is derived fromn an NX-based Graph object, so the + # nx_to_rx_node_id_map can just be the identity map... + # + nx_to_rx_node_id_map = { node: node for node in range(5) } + + tree_nx, tree_rx = \ + create_graphs_from_nx_edges( + list_of_nodes_nx, + nx_to_rx_node_id_map + ) + + # OK to use the same populations with both NX and RX Graphs populations = {0: 4, 1: 4, 2: 3, 3: 3, 4: 3} - populated_tree = PopulatedGraph( - graph=tree, populations=populations, ideal_pop=10, epsilon=0.1 + populated_tree_nx = PopulatedGraph( + graph=tree_nx, populations=populations, ideal_pop=10, epsilon=0.1 ) + populated_tree_rx = PopulatedGraph( + graph=tree_rx, populations=populations, ideal_pop=10, epsilon=0.1 + ) + + cuts_nx = find_balanced_edge_cuts_memoization(populated_tree_nx) + assert cuts_nx == [] - cuts = find_balanced_edge_cuts_memoization(populated_tree) - assert cuts == [] + cuts_rx = find_balanced_edge_cuts_memoization(populated_tree_rx) + assert cuts_rx == [] def test_prime_bound(): @@ -473,17 +742,4 @@ def test_prime_bound(): ) -def test_bipartition_tree_random_returns_a_subset_of_nodes(graph_with_pop): - ideal_pop = sum(graph_with_pop.node_data(node)["pop"] for node in graph_with_pop) / 2 - result = bipartition_tree_random(graph_with_pop, "pop", ideal_pop, 0.25, 10) - assert isinstance(result, frozenset) - assert all(node in graph_with_pop.nodes for node in result) - -def test_bipartition_tree_random_returns_within_epsilon_of_target_pop(graph_with_pop): - ideal_pop = sum(graph_with_pop.node_data(node)["pop"] for node in graph_with_pop) / 2 - epsilon = 0.25 - result = bipartition_tree_random(graph_with_pop, "pop", ideal_pop, epsilon, 10) - - part_pop = sum(graph_with_pop.node_data(node)["pop"] for node in result) - assert abs(part_pop - ideal_pop) / ideal_pop < epsilon diff --git a/tests/updaters/test_cut_edges.py b/tests/updaters/test_cut_edges.py index 757ec09b..a308482d 100644 --- a/tests/updaters/test_cut_edges.py +++ b/tests/updaters/test_cut_edges.py @@ -30,8 +30,8 @@ def invalid_cut_edges(partition): def translate_flips_to_internal_node_ids(partition, flips): # Translate flips into the internal_node_ids for the partition internal_flips = {} - for original_node_id, part in flips.items(): - internal_node_id = partition.graph.internal_node_id_for_original_node_id(original_node_id) + for original_nx_node_id, part in flips.items(): + internal_node_id = partition.graph.internal_node_id_for_original_nx_node_id(original_nx_node_id) internal_flips[internal_node_id] = part return internal_flips diff --git a/tests/updaters/test_perimeters.py b/tests/updaters/test_perimeters.py index ded769cb..f161246c 100644 --- a/tests/updaters/test_perimeters.py +++ b/tests/updaters/test_perimeters.py @@ -13,7 +13,7 @@ def setup(): grid = Grid((4, 4), with_diagonals=False) - flipped_grid = grid.flip({(2, 1): 3}, use_original_node_ids=True) + flipped_grid = grid.flip({(2, 1): 3}, use_original_nx_node_ids=True) return grid, flipped_grid @@ -35,6 +35,12 @@ def test_interior_perimeter_handles_flips_with_a_simple_grid(): def test_cut_edges_by_part_handles_flips_with_a_simple_grid(): + + # frm: TODO: Add a graphic here + # + # That will allow the person reading this code to make sense + # of what it does... + # grid, flipped_grid = setup() result = flipped_grid["cut_edges_by_part"] @@ -45,8 +51,8 @@ def test_cut_edges_by_part_handles_flips_with_a_simple_grid(): new_set_of_edges = set() for edge in set_of_edges: new_edge = ( - flipped_grid.graph.original_node_id_for_internal_node_id(edge[0]), - flipped_grid.graph.original_node_id_for_internal_node_id(edge[1]), + flipped_grid.graph.original_nx_node_id_for_internal_node_id(edge[0]), + flipped_grid.graph.original_nx_node_id_for_internal_node_id(edge[1]), ) new_set_of_edges.add(new_edge) new_result[part] = new_set_of_edges diff --git a/tests/updaters/test_splits.py b/tests/updaters/test_splits.py index 1bea1bff..81e8b7b2 100644 --- a/tests/updaters/test_splits.py +++ b/tests/updaters/test_splits.py @@ -43,7 +43,7 @@ def test_describes_splits_for_all_counties(self, partition): assert set(result.keys()) == {"a", "b", "c"} - after_a_flip = partition.flip({3: 1}, use_original_node_ids=True) + after_a_flip = partition.flip({3: 1}, use_original_nx_node_ids=True) second_result = after_a_flip["splits"] assert set(second_result.keys()) == {"a", "b", "c"} @@ -61,7 +61,7 @@ def test_no_splits(self, graph_with_counties): def test_new_split(self, partition): # Do a flip, using the node_ids of the original assignment (rather than the # node_ids used internally in the RX-based graph) - after_a_flip = partition.flip({3: 1}, use_original_node_ids=True) + after_a_flip = partition.flip({3: 1}, use_original_nx_node_ids=True) result = after_a_flip["splits"] # County b is now split, but a and c are not @@ -80,7 +80,7 @@ def test_initial_split(self, split_partition): def test_old_split(self, split_partition): # Do a flip, using the node_ids of the original assignment (rather than the # node_ids used internally in the RX-based graph) - after_a_flip = split_partition.flip({4: 1}, use_original_node_ids=True) + after_a_flip = split_partition.flip({4: 1}, use_original_nx_node_ids=True) result = after_a_flip["splits"] # County b becomes more split @@ -93,11 +93,11 @@ def test_old_split(self, split_partition): "previous partition, which is not the intuitive behavior." ) def test_initial_split_that_disappears_and_comes_back(self, split_partition): - no_splits = split_partition.flip({3: 2}, use_original_node_ids=True) + no_splits = split_partition.flip({3: 2}, use_original_nx_node_ids=True) result = no_splits["splits"] assert all(info.split == CountySplit.NOT_SPLIT for info in result.values()) - split_comes_back = no_splits.flip({3: 1}, use_original_node_ids=True) + split_comes_back = no_splits.flip({3: 1}, use_original_nx_node_ids=True) new_result = split_comes_back["splits"] assert new_result["a"].split == CountySplit.NOT_SPLIT assert new_result["b"].split == CountySplit.OLD_SPLIT diff --git a/tests/updaters/test_updaters.py b/tests/updaters/test_updaters.py index f560d717..107ec40a 100644 --- a/tests/updaters/test_updaters.py +++ b/tests/updaters/test_updaters.py @@ -33,60 +33,23 @@ def random_assignment(graph, num_districts): def partition_with_election(graph_with_d_and_r_cols): graph = graph_with_d_and_r_cols assignment = random_assignment(graph, 3) - """ - # frm: TODO: NX vs RX Issue here - node_ids in parties_to_columns are in NX context... - - This is an "interesting" issue - mostly meaning it is a PITA. - - The Election class allows you to specify what you want to tally as either a node - attribute or with an explicit external mapping of node_ids to values to be added. - - The problem is that if you pass in an explicit external mapping, you are almost - certainly using node_ids that are the "original" NX node_ids which will NOT be - the same as the new RX node_ids assigned when a partition is created. - - Note that if you just pass in an attribute name to get the data off the node - then there is no problem - the Tally code just uses the partition's part (district) - and nodes in the part (district) information. No translation to/from original - node_ids necessary. - - One approach to fixing this would be to just assume that any explicit mapping - should have the node_ids remapped after the partition is created. This could - be done by having the Election class defer doing the tally until AFTER the - partition has been created - the code would check to see if the tally exists, - and if it does not, then it would use the partition's information to - translate the parties_to_columns data to use internal node_ids and compute - the initial tally. After that, subsequent tallies for the next partition in - the chain should just work... - - I am just not sure it is worth the extra complexity to continue to support - an explicit external mapping of node_ids to vote totals... - - Need to ask Peter what he thinks we should do. Do external / legacy users - use this??? - - """ - # - # This is a royal pain, because we do not yet have a partition that tells us how - # to map these "original" node_ids into "internal" node_ids. - # - # Even worse, this is a conceptual problem, since this use case - setting up an - # Election before creating a partition is perhaps a common use case, so we don't - # want to make it complicated for users. + + # Original Code - this was deprecated when we converted to using RustworkX + # based graphs in partitions. The problem was that the node_ids used below + # are NX-based "original" node_ids that are not valid for the derived RustworkX + # based graph. # - # Need to think about what the proper solution is. Should the Election updater - # translate from "original" node_ids to "internal" node_ids - perhaps keeping a - # cache of the mapping to make it more efficient? + # party_names_to_node_attribute_names = { + # "D": {node: graph.node_data(node)["D"] for node in graph.nodes}, + # "R": {node: graph.node_data(node)["R"] for node in graph.nodes}, + # } # - # What would the migration path be for 1) legacy NX users and 2) future RX users? + # Instead we do the functionally equivalent operation which identifies where + # to find vote totals for each party using the attribute name for the party. # + party_names_to_node_attribute_names = ["D", "R"] - parties_to_columns = { - "D": {node: graph.node_data(node)["D"] for node in graph.nodes}, - "R": {node: graph.node_data(node)["R"] for node in graph.nodes}, - } - - election = Election("Mock Election", parties_to_columns) + election = Election("Mock Election", party_names_to_node_attribute_names) updaters = {"Mock Election": election, "cut_edges": cut_edges} return Partition(graph, assignment, updaters) @@ -127,12 +90,12 @@ def test_Partition_can_update_stats(): # Flip node with original node_id of 1 to be in part (district) 2 flip = {1: 2} - new_partition = partition.flip(flip, use_original_node_ids=True) + new_partition = partition.flip(flip, use_original_nx_node_ids=True) assert new_partition["total_stat"][2] == 9 -def test_tally_multiple_columns(graph_with_d_and_r_cols): +def test_tally_multiple_node_attribute_names(graph_with_d_and_r_cols): graph = graph_with_d_and_r_cols updaters = {"total": Tally(["D", "R"], alias="total")} @@ -169,7 +132,7 @@ def test_vote_proportion_returns_nan_if_total_votes_is_zero(three_by_three_grid) graph = three_by_three_grid for node in graph.nodes: - for col in election.columns: + for col in election.node_attribute_names: graph.node_data(node)[col] = 0 updaters = {"election": election} @@ -334,8 +297,6 @@ def test_exterior_boundaries_as_a_set(three_by_three_grid): def test_exterior_boundaries(three_by_three_grid): - # frm: TODO: Need to deal with NX vs. RX node_ids here - look at the other test_exterior_boundaries test - graph = three_by_three_grid for i in [0, 1, 2, 3, 5, 6, 7, 8]: @@ -361,7 +322,7 @@ def test_exterior_boundaries(three_by_three_grid): # Convert the flips into internal node_ids internal_flips = {} for node_id, part in flips.items(): - internal_node_id = partition.graph.internal_node_id_for_original_node_id(node_id) + internal_node_id = partition.graph.internal_node_id_for_original_nx_node_id(node_id) internal_flips[internal_node_id] = part new_partition = Partition(parent=partition, flips=internal_flips) @@ -382,20 +343,6 @@ def test_perimeter(three_by_three_grid): for edge in graph.edges: graph.edge_data(edge)["shared_perim"] = 1 - """ - frm: TODO: BIG bug/issue here - assignments break when converting to RX - - The problem is that RX renumbers nodes when it converts an NX graph to RX. It - does this so that it can be sure that there are no gaps - and also because sometimes - node_ids in NX are not integers. In any event, that means that any assignment - for a Partition needs to have its node_ids (from NX) converted to be whatever RX - decided to use for the new node_ids. - - I am not sure how to do this, because it does not appear that RX saves the NX - node_ids. Need to check that, though... - - HMMMMM.... - """ assignment = {0: 1, 1: 1, 2: 2, 3: 1, 4: 1, 5: 2, 6: 2, 7: 2, 8: 2} updaters = { "exterior_boundaries": exterior_boundaries, @@ -424,21 +371,6 @@ def reject_half_of_all_flips(partition): def test_elections_match_the_naive_computation(partition_with_election): - # frm: TODO: This test fails - find out why. - - """ - The pytest output follows: - - File "/Users/fred/Documents/_play/_python/_redistricting/_gerrychain/_rustworkx_work/GerryChain/tests/updaters/test_updaters.py", line 391, in test_elections_match_the_naive_computation - assert expected_party_totals == election_view.totals_for_party -AssertionError: assert {'D': {0: 119...2268, 2: 162}} == {'D': {0: 119...: 2430, 2: 0}} - - Differing items: - {'D': {0: 1191, 1: 2946, 2: 152}} != {'D': {0: 1191, 1: 3098, 2: 0}} - {'R': {0: 1171, 1: 2268, 2: 162}} != {'R': {0: 1171, 1: 2430, 2: 0}} - - """ - chain = MarkovChain( propose_random_flip, Validator([no_vanishing_districts, reject_half_of_all_flips]), @@ -456,8 +388,8 @@ def test_elections_match_the_naive_computation(partition_with_election): assert expected_party_totals == election_view.totals_for_party -def expected_tally(partition, column): +def expected_tally(partition, node_attribute_name): return { - part: sum(partition.graph.node_data(node)[column] for node in nodes) + part: sum(partition.graph.node_data(node)[node_attribute_name] for node in nodes) for part, nodes in partition.parts.items() }