summaryrefslogtreecommitdiffstats
path: root/osm_parser.py
diff options
context:
space:
mode:
authorGustav Sörnäs <gusso230@student.liu.se>2020-12-03 14:44:41 +0100
committerGustav Sörnäs <gusso230@student.liu.se>2020-12-03 14:44:41 +0100
commite15617848b6837216a8ceed4ca23b3bb415a42f7 (patch)
tree88da2cf0cfa7ef7901f24e8a58246b4b4254387b /osm_parser.py
parent3e4015d2c06935b9c2c3da92f586df02c1b903d2 (diff)
downloadtdde25-e15617848b6837216a8ceed4ca23b3bb415a42f7.tar.gz
implement iterparsexml-iterparse
Diffstat (limited to 'osm_parser.py')
-rw-r--r--osm_parser.py81
1 files changed, 43 insertions, 38 deletions
diff --git a/osm_parser.py b/osm_parser.py
index 60754c9..7145726 100644
--- a/osm_parser.py
+++ b/osm_parser.py
@@ -57,9 +57,9 @@ OUT_TAGS_KEY = "tags"
class OSMParser:
"""
- A Parser that parses OSM data and enables iterating over
+ A Parser that parses OSM data and enables iterating over
Nodes and Ways as well as selecting which tags to show from the data.
-
+
One of the focuses of the parser has been to create something simple
that abstracts away as much object oriented features as possible from a normal user
@@ -67,69 +67,74 @@ class OSMParser:
in the XML file into a python dictionary. (since this key-value stuff is basically what xml is)
"""
-
def __init__(self, fname, all_way_tags):
- """
+ """
Initialize. if all_way_tags is True then the parser will return ways
with all tags, otherwise it will only choose specific tags
"""
- if not FILEEXT in fname:
+ if not fname.endswith(FILEEXT):
fname = fname + FILEEXT
- self.tree = ET.parse(fname) # ET, parse home
+ self.fname = fname
self.node_tags = set()
self.way_tags = set()
self.allow_all = all_way_tags
-
def add_node_tag(self, tag):
""" Adds a tag to be searched for when looking at nodes """
self.node_tags.add(tag)
-
def add_way_tag(self, tag):
""" Adds a tag to be searched for when looking at ways (edges) """
self.way_tags.add(tag)
-
def iter_nodes(self):
- """
+ """
Iterator-object for all nodes in osm-tree.
Returns a dictionary with the (manually) added tags
"""
- # Root of xml tree
- root = self.tree.getroot()
- for node in root.iter(OSM_NODE):
-
- # yield a dictionary with all tags that are added to self.node_tags
- yield { tag : node.attrib.get(tag, None)
- for tag in self.node_tags
- if tag in node.attrib }
-
+ for ev, el in ET.iterparse(self.fname, events=("start", "end")):
+ if ev == "start" and el.tag == "osm":
+ root = el
+ elif ev == "end" and el.tag == "node":
+ yield {k: v for k, v in el.items()}
+ if ev == "end":
+ if el in root:
+ root.remove(el)
+ el.clear()
def iter_ways(self):
- """
+ """
Iterator-object for all ways (edges) in osm-tree.
Returns a dictionary with the ways
"""
- # Root of xml-tree
- root = self.tree.getroot()
- for way in root.iter(OSM_WAY):
-
- # Take out roads and tags
- road = tuple( node.attrib[OSM_NODE_REFERENCE]
- for node in way.iter(OSM_WAYNODE) )
- tags = { tag.attrib[OSM_TAG_KEY] : tag.attrib[OSM_TAG_VALUE]
- for tag in way.iter(OSM_TAG)
- if self.allow_all or tag.attrib[OSM_TAG_KEY] in self.way_tags}
-
- # Yield the edge id, the road and the tags
- yield {
- OUT_EDGE_ID : way.attrib[OSM_WAY_ID],
- OUT_EDGES_KEY : road,
- OUT_TAGS_KEY : tags,
- }
-
+ reading_way = False
+ for ev, el in ET.iterparse(self.fname, events=("start", "end")):
+ if ev == "start" and el.tag == "osm":
+ root = el
+ elif ev == "start" and el.tag == "way":
+ reading_way = True
+ elif ev == "end" and el.tag == "way":
+ reading_way = False
+ # Take out roads and tags
+ road = tuple(node.get(OSM_NODE_REFERENCE)
+ for node in el.iter(OSM_WAYNODE))
+ tags = {tag.get(OSM_TAG_KEY): tag.get(OSM_TAG_VALUE)
+ for tag in el.iter(OSM_TAG)
+ if self.allow_all or tag.get(OSM_TAG_KEY) in self.way_tags}
+
+ # Yield the edge id, the road and the tags
+ yield {
+ OUT_EDGE_ID: el.get(OSM_WAY_ID),
+ OUT_EDGES_KEY: road,
+ OUT_TAGS_KEY: tags,
+ }
+
+ if ev == "end" and not reading_way:
+ if el in root:
+ root.remove(el)
+ el.clear()
+
##########################################
# Example Usage and Default Getter #