diff --git a/demo_sequencer.py b/demo_sequencer.py index 72f483d..b53b72e 100644 --- a/demo_sequencer.py +++ b/demo_sequencer.py @@ -1,11 +1,11 @@ from sequencer import NetworkPlan from sequencer.Models import EnergyMaximizeReturn -csv = '/Users/blogle/Downloads/3305/metrics-local.csv' -shp = '/Users/blogle/Downloads/3305/networks-proposed.shp' +csv = 'data/sumaila/input/metrics-local.csv' +shp = 'data/sumaila/input/networks-proposed.shp' -nwp = NetworkPlan(shp, csv, prioritize='Population') +nwp = NetworkPlan.from_files(shp, csv, prioritize='Population') model = EnergyMaximizeReturn(nwp) results = model.sequence() -model.output('/Users/blogle/Desktop/output/') +model.output('output') diff --git a/sequencer/Models.py b/sequencer/Models.py index a604dce..1c4a991 100644 --- a/sequencer/Models.py +++ b/sequencer/Models.py @@ -12,3 +12,5 @@ def _strip_cols(self): def sequence(self): super(EnergyMaximizeReturn, self).sequence() self._strip_cols() + # return to be consistent with parent + return self.output_frame diff --git a/sequencer/NetworkPlan.py b/sequencer/NetworkPlan.py index 275a424..ed00259 100644 --- a/sequencer/NetworkPlan.py +++ b/sequencer/NetworkPlan.py @@ -4,8 +4,6 @@ import fiona import numpy as np import networkx as nx -from scipy.sparse import csr_matrix -import scipy.sparse.csgraph as graph import pandas as pd import logging import copy @@ -19,31 +17,32 @@ class NetworkPlan(object): NetworkPlan containing NetworkPlanner proposed network and accompanying nodal metrics - Parameters - ---------- - shp : file or string (File, directory, or filename to read). - csv : string or file handle / StringIO. - - Example - ---------- - NetworkPlan('/Users/blogle/Downloads/1643/networks-proposed.shp', - '/Users/blogle/Downloads/1643/metrics-local.csv') """ TOL = .5 # meters at the equator, tolerance is stricter towards the poles - def __init__(self, shp, csv, **kwargs): - self.shp_p, self.csv_p = shp, csv - self.priority_metric = kwargs['prioritize'] if 'prioritize' in kwargs else 'population' + def __init__(self, network, metrics, **kwargs): + self.priority_metric = kwargs.get('prioritize', 'population') + self.proj = kwargs.get('proj', 'utm') - logger.info('Asserting Input Projections Match') - self._assert_proj_match(shp, csv) + # FIXME: + # Remove the dependency that sequencer has on the + # original metrics file (this is terrible coupling) + # see sequencer:_clean_results() + self._original_metrics = metrics + + self._init_helper(network, metrics) + + + def _init_helper(self, network, metrics): + """ + All initialization (cleaning up metrics, network, etc) + """ # Load in and align input data logger.info('Aligning Network Nodes With Input Metrics') - self._network, self._metrics = prep_data( nx.read_shp(shp), - pd.read_csv(csv, header=1), - loc_tol = self.TOL - ) + self._network, self._metrics = prep_data(network, + metrics, + loc_tol = self.TOL) self.coord_values = self.coords.values() @@ -63,6 +62,33 @@ def __init__(self, shp, csv, **kwargs): #Fillna values with Zero self._metrics = self.metrics.fillna(0) + + @classmethod + def from_files(cls, shp, csv, **kwargs): + """ + Parameters + ---------- + shp : file or string (File, directory, or filename to read). + csv : string or file handle / StringIO. + + Example + ---------- + NetworkPlan.from_files('networks-proposed.shp', + 'metrics-local.csv') + """ + + logger.info('Asserting Input Projections Match') + + cls._assert_proj_match(shp, csv) + # Use fiona to open the shapefile as this includes the projection type + + shapefile = fiona.open(shp) + # Pass along the projection + kwargs['proj'] = shapefile.crs['proj'] + + return cls(nx.read_shp(shp), pd.read_csv(csv, header=1), **kwargs) + + @classmethod def _assert_proj_match(self, shp, csv): """Ensure that the projections match before continuing""" # Use fiona to open the shapefile as this includes the projection type @@ -83,9 +109,8 @@ def _assert_proj_match(self, shp, csv): logger.error("csv and shp Projections Don't Match") raise AssertionError("csv and shapefile Projections Don't Match") - # Save the state of the projection - self.proj = shapefile.crs['proj'] - + + def assert_is_tree(self): in_degree = self.network.in_degree() @@ -254,12 +279,16 @@ def network(self): """returns the DiGraph Object representation of the graph""" return self._network + @property + def original_metrics(self): + """returns the original (unprocessed) metrics data_frame""" + return self._original_metrics + @property def metrics(self): """returns the nodal metrics Pandas DataFrame""" return self._metrics - def download_scenario(scenario_number, directory_name=None, username=None, password=None, np_url='http://networkplanner.modilabs.org/'): @@ -322,4 +351,4 @@ def write_file(name): csv = os.path.join(directory_name, 'metrics-local.csv') shp = os.path.join(directory_name, 'network-proposed.shp') - return NetworkPlan(shp, csv) + return NetworkPlan.from_files(shp, csv) diff --git a/sequencer/Sequencer.py b/sequencer/Sequencer.py index b79bb62..24ece15 100644 --- a/sequencer/Sequencer.py +++ b/sequencer/Sequencer.py @@ -127,13 +127,18 @@ def _sequence(self): def upstream_distance(self, node): """Computes the edge distance from a node to it's parent""" parent = self.parent(node) - if parent != None: + if parent is not None: return self.networkplan._distance(parent, node) return 0.0 def sequence(self): - self.results = pd.DataFrame(self._sequence()).set_index('Sequence..Far.sighted.sequence') + """ + Compute the sequence (aka rank) of nodes and edges + This modifies the NetworkPlan member (so make a deep copy if you + need the original) + """ + self.results = pd.DataFrame(self._sequence(), dtype=object).set_index('Sequence..Far.sighted.sequence') # Post process for output self._build_node_wkt() self._build_edge_wkt() @@ -235,7 +240,7 @@ def _build_edge_wkt(self): r = self.results # Iterate through the nodes and their parent for rank, fnode, tnode in zip(r.index, r['Sequence..Upstream.id'], r['Sequence..Vertex.id']): - if not np.isnan(fnode): + if fnode is not None: # Set the edge attributes with those found in sequencing self.networkplan.network.edge[fnode][tnode]['rank'] = int(rank) self.networkplan.network.edge[fnode][tnode]['distance'] = float(self.networkplan._distance(fnode, tnode)) @@ -289,8 +294,8 @@ def _clean_results(self): """This joins the sequenced results on the metrics dataframe and reappends the dropped rows""" logger.info('Joining Sequencer Results on Input Metrics') - - orig = pd.read_csv(self.networkplan.csv_p, header=1) + # FIXME: Remove this dependency on original_metrics + orig = self.networkplan.original_metrics orig.columns = parse_cols(orig) self.networkplan.metrics.index.name = 'Sequence..Vertex.id' sequenced_metrics = pd.merge(self.networkplan.metrics.reset_index(), self.results.reset_index(), on='Sequence..Vertex.id') diff --git a/sequencer/Tests/Test_Suite.py b/sequencer/Tests/Test_Suite.py index c60eb84..f27d3ad 100644 --- a/sequencer/Tests/Test_Suite.py +++ b/sequencer/Tests/Test_Suite.py @@ -46,6 +46,90 @@ def gen_data(): return metrics, network.to_directed() + +def gen_data_with_fakes(): + """ + generate network and metrics where some of the network + nodes do not have corresponding metrics records + + This should be sufficient for tests requiring fake nodes + + network looks like (fake node starred, demand in parens) + + 6* + | + | + 0(100) 3(12) + / \ / \ + / \ / \ + 1(50) 2(25) 4(6) 5(3) + + Also returns edge_rank: dict of edge -> rank + """ + + # create disjoint graph with 2 trees, one rooted by a fake node + network = nx.graph.Graph() + edges = ((0, 1), (0, 2), (3, 4), (3, 5)) + network.add_edges_from(edges) + + # now add fake root to tree at 3 + network.add_edge(6, 3) + + # set coordinates (roughly match diagram above) + base_coord = np.array([10, 10]) + fake_coord = np.array([20, 9]) + coord_dict = {0: base_coord, + 1: base_coord + [-1, 1], + 2: base_coord + [1, 1], + 3: fake_coord + [0, 1], + 4: fake_coord + [-1, 2], + 5: fake_coord + [1, 2], + 6: fake_coord} + + nx.set_node_attributes(network, 'coords', coord_dict) + # now set the metrics dataframe without the fake node + metrics_data = {'Demand...Projected.nodal.demand.per.year': + [100, 50, 25, 12, 6, 3], + 'Population': [100, 50, 25, 12, 6, 3]} + + metrics = DataFrame(metrics_data) + # Note, we skip fake node here + metrics['X'] = [ coord_dict[i][0] for i in range(6) ] + metrics['Y'] = [ coord_dict[i][1] for i in range(6) ] + + # assign expected ranks to nodes, edges (the sequence) + # note: + # - ranks are 1-based and originally assigned to nodes + # - edges are assigned rank based on the "to" node + # - fake nodes are skipped when assigning rank + # (See Sequencer.sequencer._sequence for details) + node_rank = {0: 1, 1: 2, 2: 3, 3: 4, 4: 5, 5: 6} + edge_rank = {(0, 1): 2, (0, 2): 3, (6, 3): 4, (3, 4): 5, (3, 5): 6} + return metrics, network, node_rank, edge_rank + + +def test_sequencer_with_fakes(): + """ + Make sure we work with fake nodes + """ + + # for now, just make sure it runs without exceptions + metrics, network, node_rank, edge_rank = gen_data_with_fakes() + nwp = NetworkPlan(network, metrics, prioritize='Population', proj='wgs4') + model = EnergyMaximizeReturn(nwp) + results = model.sequence() + + node_ids = results['Sequence..Vertex.id'] + sequence_ids = results['Sequence..Far.sighted.sequence'] + actual_node_rank = dict(zip(node_ids, sequence_ids)) + actual_edge_rank = {k: v['rank'] for k, v in + model.networkplan.network.edge.iteritems()} + assert node_rank == actual_node_rank,\ + "Node sequencing is not what was expected" + assert edge_rank == actual_edge_rank,\ + "Edge sequencing is not what was expected" + + class TestNetworkPlan(NetworkPlan): def __init__(self): @@ -144,7 +228,7 @@ def test_sequencer_compare(): input_dir = "data/sumaila/input" csv_file = os.path.join(input_dir, "metrics-local.csv") shp_file = os.path.join(input_dir, "networks-proposed.shp") - nwp = NetworkPlan(shp_file, csv_file, prioritize='Population') + nwp = NetworkPlan.from_files(shp_file, csv_file, prioritize='Population') model = EnergyMaximizeReturn(nwp) model.sequence() diff --git a/sequencer/Utils.py b/sequencer/Utils.py index e2ae2a9..7e772f8 100644 --- a/sequencer/Utils.py +++ b/sequencer/Utils.py @@ -7,6 +7,7 @@ import numpy as np from numpy import sin, cos, pi, arcsin, sqrt import string +import collections def prep_data(network, metrics, loc_tol=.5): """ @@ -15,7 +16,12 @@ def prep_data(network, metrics, loc_tol=.5): """ # convert the node names from coords to integers, cache the coords as attrs - network = nx.convert_node_labels_to_integers(network, label_attribute='coords') + # but ONLY if the nodes are themselves collections (which is the default for + # networkx shapefile import) + # otherwise, assume the coords attribute exists + if(len(network.nodes()) > 0 and + isinstance(network.nodes()[0], collections.Iterable)): + network = nx.convert_node_labels_to_integers(network, label_attribute='coords') # convert special characters to dot notation metrics.columns = parse_cols(metrics) @@ -28,7 +34,6 @@ def prep_data(network, metrics, loc_tol=.5): # cast coords to tuples (hashable) node_df['coords'] = node_df['coords'].apply(tuple) - metrics['m_coords'] = metrics['m_coords'].apply(tuple) # build a vector of all the coordinates in the metrics dataframe coords_vec = np.vstack(metrics['m_coords'].values) @@ -46,7 +51,6 @@ def fuzzy_match(coord): # cast the coordinates back to tuples (hashable) node_df['m_coords'] = node_df['m_coords'].apply(tuple) - metrics['m_coords'] = metrics['m_coords'].apply(tuple) # now that we have identical metric coords in both node_df and metrics join on that column metrics = pd.merge(metrics, node_df, on='m_coords', left_index=True).sort()