diff --git a/flex-config/README.md b/flex-config/README.md index 5bd85e61b..0a2b9a256 100644 --- a/flex-config/README.md +++ b/flex-config/README.md @@ -17,6 +17,8 @@ following order (from easiest to understand to the more complex ones): After that you can dive into more advanced topics: +* [public-transport.lua](public-transport.lua) -- Use multi-stage processing + to bring tags from public transport relations to member nodes and ways * [route-relations.lua](route-relations.lua) -- Use multi-stage processing to bring tags from relations to member ways * [unitable.lua](unitable.lua) -- Put all OSM data into a single table diff --git a/flex-config/public-transport.lua b/flex-config/public-transport.lua new file mode 100644 index 000000000..a0b97539a --- /dev/null +++ b/flex-config/public-transport.lua @@ -0,0 +1,224 @@ +-- This config example file is released into the Public Domain. + +-- This file shows how to use multi-stage processing to bring tags from +-- public transport relations into member nodes and ways. This allows +-- advanced processing of public transport networks including stops. + +-- Nodes tagged as public transport stops are imported into the 'stops' table, +-- if they are part of a public transport relation. Ways tagged as highway or +-- railway or imported into the 'lines' table. The public transport routes +-- themselves will be in the 'routes' table, but without any geometry. As a +-- "bonus" public transport stop area relations will be imported into the +-- 'stop_areas' table. +-- +-- For the 'stops' and 'lines' table two-stage processing is used. The +-- 'rel_refs' text column will contain a list of all ref tags found in parent +-- relations with type=route and route=public_transport. The 'rel_ids' column +-- will be an integer array containing the relation ids. These could be used, +-- for instance, to look up other relation tags from the 'routes' table. + +local tables = {} + +tables.stops = osm2pgsql.define_node_table('stops', { + { column = 'tags', type = 'jsonb' }, + { column = 'rel_refs', type = 'text' }, -- for the refs from the relations + { column = 'rel_ids', sql_type = 'int8[]' }, -- array with integers (for relation IDs) + { column = 'geom', type = 'point', not_null = true }, +}) + +tables.lines = osm2pgsql.define_way_table('lines', { + { column = 'tags', type = 'jsonb' }, + { column = 'rel_refs', type = 'text' }, -- for the refs from the relations + { column = 'rel_ids', sql_type = 'int8[]' }, -- array with integers (for relation IDs) + { column = 'geom', type = 'linestring', not_null = true }, +}) + +-- Tables don't have to have a geometry column +tables.routes = osm2pgsql.define_relation_table('routes', { + { column = 'ref', type = 'text' }, + { column = 'type', type = 'text' }, + { column = 'from', type = 'text' }, + { column = 'to', type = 'text' }, + { column = 'tags', type = 'jsonb' }, +}) + +-- Stop areas contain everything belonging to a specific public transport +-- stop. We model them here by adding a center point as geometry plus the +-- radius of a circle that contains everything in that stop. +tables.stop_areas = osm2pgsql.define_relation_table('stop_areas', { + { column = 'tags', type = 'jsonb' }, + { column = 'radius', type = 'real', not_null = true }, + { column = 'geom', type = 'point', not_null = true }, +}) + +-- This will be used to store information about relations queryable by member +-- node/way id. These are table of tables. The outer table is indexed by the +-- node/way id, the inner table indexed by the relation id. This way even if +-- the information about a relation is added twice, it will be in there only +-- once. It is always good to write your osm2pgsql Lua code in an idempotent +-- way, i.e. it can be called any number of times and will lead to the same +-- result. +local n2r = {} +local w2r = {} + +local function clean_tags(tags) + tags.odbl = nil + tags.created_by = nil + tags.source = nil + tags['source:ref'] = nil + + return next(tags) == nil +end + +local function unique_array(array) + local result = {} + + local last = nil + for _, v in ipairs(array) do + if v ~= last then + result[#result + 1] = v + last = v + end + end + + return result +end + +local separator = 'ยท' -- use middle dot as separator character + +local function add_rel_data(row, d) + if not d then + return + end + + local refs = {} + local ids = {} + for rel_id, rel_ref in pairs(d) do + refs[#refs + 1] = rel_ref + ids[#ids + 1] = rel_id + end + table.sort(refs) + table.sort(ids) + + row.rel_refs = table.concat(unique_array(refs), separator) + row.rel_ids = '{' .. table.concat(unique_array(ids), ',') .. '}' +end + +function osm2pgsql.process_node(object) + -- We are only interested in public transport stops here, and they are + -- only available in the second stage. + if osm2pgsql.stage ~= 2 then + return + end + + local row = { + tags = object.tags, + geom = object:as_point() + } + + -- If there is any data from parent relations, add it in + add_rel_data(row, n2r[object.id]) + + tables.stops:insert(row) +end + +function osm2pgsql.process_way(object) + -- We are only interested in highways and railways + if not object.tags.highway and not object.tags.railway then + return + end + + clean_tags(object.tags) + + -- Data we will store in the 'lines' table always has the tags from + -- the way + local row = { + tags = object.tags, + geom = object:as_linestring() + } + + -- If there is any data from parent relations, add it in + add_rel_data(row, w2r[object.id]) + + tables.lines:insert(row) +end + +local pt = { + bus = true, + light_rail = true, + subway = true, + tram = true, + trolleybus = true, +} + +-- We are only interested in certain route relations with a ref tag +local function wanted_relation(tags) + return tags.type == 'route' and pt[tags.route] and tags.ref +end + +-- This function is called for every added, modified, or deleted relation. +-- Its only job is to return the ids of all member nodes/ways of the specified +-- relation we want to see in stage 2 again. It MUST NOT store any information +-- about the relation! +function osm2pgsql.select_relation_members(relation) + -- Only interested in public transport relations with refs + if wanted_relation(relation.tags) then + local node_ids = {} + local way_ids = {} + + for _, member in ipairs(relation.members) do + if member.type == 'n' and member.role == 'stop' then + node_ids[#node_ids + 1] = member.ref + elseif member.type == 'w' and member.role == '' then + way_ids[#way_ids + 1] = member.ref + end + end + + return { + nodes = node_ids, + ways = way_ids, + } + end +end + +-- The process_relation() function should store all information about relation +-- members that might be needed in stage 2. +function osm2pgsql.process_relation(object) + if object.tags.type == 'public_transport' and object.tags.public_transport == 'stop_area' then + local x1, y1, x2, y2 = object:as_geometrycollection():transform(3857):get_bbox() + local radius = math.sqrt((x2-x1)*(x2-x1) + (y2-y1)*(y2-y1)) + tables.stop_areas:insert({ + tags = object.tags, + geom = object:as_geometrycollection():centroid(), + radius = radius, + }) + return + end + + if wanted_relation(object.tags) then + tables.routes:insert({ + type = object.tags.route, + ref = object.tags.ref, + from = object.tags.from, + to = object.tags.to, + tags = object.tags, + }) + + -- Go through all the members and store relation ids and refs so they + -- can be found by the member node/way id. + for _, member in ipairs(object.members) do + if member.type == 'n' then + if not n2r[member.ref] then + n2r[member.ref] = {} + end + n2r[member.ref][object.id] = object.tags.ref + elseif member.type == 'w' then + if not w2r[member.ref] then + w2r[member.ref] = {} + end + w2r[member.ref][object.id] = object.tags.ref + end + end + end +end + diff --git a/src/init.lua b/src/init.lua index 26571cb5f..9d0c76413 100644 --- a/src/init.lua +++ b/src/init.lua @@ -46,6 +46,16 @@ function osm2pgsql.define_area_table(_name, _columns, _options) return _define_table_impl('area', _name, _columns, _options) end +function osm2pgsql.node_member_ids(relation) + local ids = {} + for _, member in ipairs(relation.members) do + if member.type == 'n' then + ids[#ids + 1] = member.ref + end + end + return ids +end + function osm2pgsql.way_member_ids(relation) local ids = {} for _, member in ipairs(relation.members) do diff --git a/src/middle-pgsql.cpp b/src/middle-pgsql.cpp index 34a2ae9d3..c44b69d0d 100644 --- a/src/middle-pgsql.cpp +++ b/src/middle-pgsql.cpp @@ -691,7 +691,10 @@ INSERT INTO osm2pgsql_changed_relations m_db_connection.exec(build_sql(*m_options, query)); } - load_id_list(m_db_connection, "osm2pgsql_changed_ways", parent_ways); + if (parent_ways) { + load_id_list(m_db_connection, "osm2pgsql_changed_ways", parent_ways); + } + load_id_list(m_db_connection, "osm2pgsql_changed_relations", parent_relations); @@ -700,9 +703,17 @@ INSERT INTO osm2pgsql_changed_relations timer.stop(); log_debug("Found {} new/changed nodes in input.", changed_nodes.size()); - log_debug(" Found in {} their {} parent ways and {} parent relations.", - std::chrono::duration_cast(timer.elapsed()), - parent_ways->size(), parent_relations->size()); + + auto const elapsed_sec = + std::chrono::duration_cast(timer.elapsed()); + + if (parent_ways) { + log_debug(" Found in {} their {} parent ways and {} parent relations.", + elapsed_sec, parent_ways->size(), parent_relations->size()); + } else { + log_debug(" Found in {} their {} parent relations.", elapsed_sec, + parent_relations->size()); + } } void middle_pgsql_t::get_way_parents(idlist_t const &changed_ways, @@ -771,6 +782,21 @@ void middle_pgsql_t::way_set(osmium::Way const &way) namespace { +/** + * Build node in buffer from database results. + */ +void build_node(osmid_t id, pg_result_t const &res, int res_num, int offset, + osmium::memory::Buffer *buffer, bool with_attributes) +{ + osmium::builder::NodeBuilder builder{*buffer}; + builder.set_id(id); + + if (with_attributes) { + set_attributes_on_builder(&builder, res, res_num, offset); + } + pgsql_parse_json_tags(res.get_value(res_num, offset + 1), buffer, &builder); +} + /** * Build way in buffer from database results. */ @@ -789,6 +815,24 @@ void build_way(osmid_t id, pg_result_t const &res, int res_num, int offset, } // anonymous namespace +bool middle_query_pgsql_t::node_get(osmid_t id, + osmium::memory::Buffer *buffer) const +{ + assert(buffer); + + auto const res = m_db_connection.exec_prepared("get_node", id); + + if (res.num_tuples() != 1) { + return false; + } + + build_node(id, res, 0, 0, buffer, m_store_options.with_attributes); + + buffer->commit(); + + return true; +} + bool middle_query_pgsql_t::way_get(osmid_t id, osmium::memory::Buffer *buffer) const { diff --git a/src/middle-pgsql.hpp b/src/middle-pgsql.hpp index fc01f2ebc..4c7dd9218 100644 --- a/src/middle-pgsql.hpp +++ b/src/middle-pgsql.hpp @@ -59,6 +59,8 @@ class middle_query_pgsql_t : public middle_query_t size_t nodes_get_list(osmium::WayNodeList *nodes) const override; + bool node_get(osmid_t id, osmium::memory::Buffer *buffer) const override; + bool way_get(osmid_t id, osmium::memory::Buffer *buffer) const override; size_t rel_members_get(osmium::Relation const &rel, diff --git a/src/middle-ram.cpp b/src/middle-ram.cpp index 13c3d96eb..74e1e44fb 100644 --- a/src/middle-ram.cpp +++ b/src/middle-ram.cpp @@ -278,6 +278,16 @@ std::size_t middle_ram_t::nodes_get_list(osmium::WayNodeList *nodes) const return count; } +bool middle_ram_t::node_get(osmid_t id, osmium::memory::Buffer *buffer) const +{ + assert(buffer); + + if (m_store_options.nodes) { + return get_object(osmium::item_type::node, id, buffer); + } + return false; +} + bool middle_ram_t::way_get(osmid_t id, osmium::memory::Buffer *buffer) const { assert(buffer); diff --git a/src/middle-ram.hpp b/src/middle-ram.hpp index 935491012..6fe5924a2 100644 --- a/src/middle-ram.hpp +++ b/src/middle-ram.hpp @@ -68,6 +68,8 @@ class middle_ram_t : public middle_t, public middle_query_t std::size_t nodes_get_list(osmium::WayNodeList *nodes) const override; + bool node_get(osmid_t id, osmium::memory::Buffer *buffer) const override; + bool way_get(osmid_t id, osmium::memory::Buffer *buffer) const override; size_t rel_members_get(osmium::Relation const &rel, diff --git a/src/middle.hpp b/src/middle.hpp index 4b3499735..82a71fe92 100644 --- a/src/middle.hpp +++ b/src/middle.hpp @@ -50,11 +50,22 @@ struct middle_query_t : std::enable_shared_from_this */ virtual size_t nodes_get_list(osmium::WayNodeList *nodes) const = 0; + /** + * Retrieves a single node from the nodes storage + * and stores it in the given osmium buffer. + * + * \param id id of the node to retrieve + * \param buffer osmium buffer where to put the node + * + * \return true if the node was retrieved + */ + virtual bool node_get(osmid_t id, osmium::memory::Buffer *buffer) const = 0; + /** * Retrieves a single way from the ways storage * and stores it in the given osmium buffer. * - * \param id id of the way to retrive + * \param id id of the way to retrieve * \param buffer osmium buffer where to put the way * * The function does not retrieve the node locations. @@ -78,10 +89,10 @@ struct middle_query_t : std::enable_shared_from_this osmium::osm_entity_bits::type types) const = 0; /** - * Retrives a single relation from the relation storage + * Retrieves a single relation from the relation storage * and stores it in the given osmium buffer. * - * \param id id of the relation to retrive + * \param id id of the relation to retrieve * \param buffer osmium buffer where to put the relation * * \return true if the relation was retrieved diff --git a/src/osmdata.cpp b/src/osmdata.cpp index 06aaf4bdf..3ced9ed31 100644 --- a/src/osmdata.cpp +++ b/src/osmdata.cpp @@ -413,13 +413,15 @@ void osmdata_t::process_dependents() } // stage 1c processing: mark parent relations of marked objects as changed + auto const &marked_nodes = m_output->get_marked_node_ids(); auto const &marked_ways = m_output->get_marked_way_ids(); - if (marked_ways.empty()) { + if (marked_nodes.empty() && marked_ways.empty()) { return; } - // process parent relations of marked ways + // process parent relations of marked nodes and ways idlist_t rels_pending_tracker{}; + m_mid->get_node_parents(marked_nodes, nullptr, &rels_pending_tracker); m_mid->get_way_parents(marked_ways, &rels_pending_tracker); if (rels_pending_tracker.empty()) { diff --git a/src/output-flex.cpp b/src/output-flex.cpp index e059d73a3..003a59718 100644 --- a/src/output-flex.cpp +++ b/src/output-flex.cpp @@ -899,6 +899,54 @@ void output_flex_t::pending_way(osmid_t id) get_mutex_and_call_lua_function(m_process_way, m_way_cache.get()); } +/** + * Expects a Lua (hash) table on the stack, reads the field with name of the + * 'type' parameter which must be either nil or a Lua (array) table, in which + * case all (integer) ids in that table are reads into the 'ids' out + * parameter. + */ +void get_object_ids(lua_State *lua_state, char const *const type, idlist_t *ids) +{ + lua_getfield(lua_state, -1, type); + int const ltype = lua_type(lua_state, -1); + + if (ltype == LUA_TNIL) { + lua_pop(lua_state, 1); + return; + } + + if (ltype != LUA_TTABLE) { + lua_pop(lua_state, 1); + throw fmt_error( + "Table returned from select_relation_members() contains '{}' " + "field, but it isn't an array table.", + type); + } + + if (!luaX_is_array(lua_state)) { + lua_pop(lua_state, 1); + throw fmt_error( + "Table returned from select_relation_members() contains '{}' " + "field, but it isn't an array table.", + type); + } + + luaX_for_each(lua_state, [&]() { + osmid_t const id = lua_tointeger(lua_state, -1); + if (id == 0) { + throw fmt_error( + "Table returned from select_relation_members() contains " + "'{}' field, which must contain an array of non-zero " + "integer node ids.", + type); + } + + ids->push_back(id); + }); + + lua_pop(lua_state, 1); +} + void output_flex_t::select_relation_members() { if (!m_select_relation_members) { @@ -921,43 +969,13 @@ void output_flex_t::select_relation_members() "other than nil or a table."}; } - // We have established that we have a table. Get the 'ways' field... - lua_getfield(lua_state(), -1, "ways"); - int const ltype = lua_type(lua_state(), -1); + // We have established that we have a table... - // No 'ways' field, that is okay, nothing to be marked. - if (ltype == LUA_TNIL) { - lua_pop(lua_state(), 2); // return value (a table), ways field (nil) - return; - } + // Get the 'nodes' and 'ways' fields... + get_object_ids(lua_state(), "nodes", m_stage2_node_ids.get()); + get_object_ids(lua_state(), "ways", m_stage2_way_ids.get()); - if (ltype != LUA_TTABLE) { - throw std::runtime_error{ - "Table returned from select_relation_members() contains 'ways' " - "field, but it isn't an array table."}; - } - - // Iterate over the 'ways' table to get all ids... - if (!luaX_is_array(lua_state())) { - throw std::runtime_error{ - "Table returned from select_relation_members() contains 'ways' " - "field, but it isn't an array table."}; - } - - luaX_for_each( - lua_state(), [&]() { - osmid_t const id = lua_tointeger(lua_state(), -1); - if (id == 0) { - throw std::runtime_error{ - "Table returned from select_relation_members() contains " - "'ways' field, which must contain an array of non-zero " - "integer way ids."}; - } - - m_stage2_way_ids->push_back(id); - }); - - lua_pop(lua_state(), 2); // return value (a table), ways field (a table) + lua_pop(lua_state(), 1); // return value (a table) } void output_flex_t::select_relation_members(osmid_t id) @@ -1261,8 +1279,9 @@ output_flex_t::output_flex_t(std::shared_ptr const &mid, // If the osm2pgsql.select_relation_members() Lua function is defined // it means we need two-stage processing which in turn means we need - // the full ways stored in the middle. + // the full nodes and ways stored in the middle. if (m_select_relation_members) { + access_requirements().full_nodes = true; access_requirements().full_ways = true; } @@ -1482,10 +1501,23 @@ void output_flex_t::init_lua(std::string const &filename, lua_remove(lua_state(), 1); // global "osm2pgsql" } +idlist_t const &output_flex_t::get_marked_node_ids() +{ + if (m_stage2_node_ids->empty()) { + log_info("Skipping stage 1c for nodes (no marked nodes)."); + } else { + log_info("Entering stage 1c processing of {} nodes...", + m_stage2_node_ids->size()); + m_stage2_node_ids->sort_unique(); + } + + return *m_stage2_node_ids; +} + idlist_t const &output_flex_t::get_marked_way_ids() { if (m_stage2_way_ids->empty()) { - log_info("Skipping stage 1c (no marked ways)."); + log_info("Skipping stage 1c for ways (no marked ways)."); } else { log_info("Entering stage 1c processing of {} ways...", m_stage2_way_ids->size()); @@ -1497,12 +1529,12 @@ idlist_t const &output_flex_t::get_marked_way_ids() void output_flex_t::reprocess_marked() { - if (m_stage2_way_ids->empty()) { - log_info("No marked ways (Skipping stage 2)."); + if (m_stage2_node_ids->empty() && m_stage2_way_ids->empty()) { + log_info("No marked nodes or ways (Skipping stage 2)."); return; } - log_info("Reprocess marked ways (stage 2)..."); + log_info("Reprocess marked nodes/ways (stage 2)..."); if (!get_options()->append) { util::timer_t timer; @@ -1528,8 +1560,30 @@ void output_flex_t::reprocess_marked() lua_setfield(lua_state(), -2, "stage"); lua_pop(lua_state(), 1); // osm2pgsql + m_stage2_node_ids->sort_unique(); m_stage2_way_ids->sort_unique(); + log_info("There are {} nodes to reprocess...", m_stage2_node_ids->size()); + { + osmium::memory::Buffer node_buffer{ + 1024, osmium::memory::Buffer::auto_grow::yes}; + + for (osmid_t const id : *m_stage2_node_ids) { + if (middle().node_get(id, &node_buffer)) { + node_delete(id); + if (m_process_node) { + auto const &node = node_buffer.get(0); + m_context_node = &node; + get_mutex_and_call_lua_function(m_process_node, node); + } + } + node_buffer.clear(); + } + } + + // We don't need these any more so can free the memory. + m_stage2_node_ids->clear(); + log_info("There are {} ways to reprocess...", m_stage2_way_ids->size()); for (osmid_t const id : *m_stage2_way_ids) { diff --git a/src/output-flex.hpp b/src/output-flex.hpp index 9a4ad92ff..f5191d0ce 100644 --- a/src/output-flex.hpp +++ b/src/output-flex.hpp @@ -127,7 +127,9 @@ class output_flex_t : public output_t void wait() override; + idlist_t const &get_marked_node_ids() override; idlist_t const &get_marked_way_ids() override; + void reprocess_marked() override; void pending_way(osmid_t id) override; @@ -280,8 +282,9 @@ class output_flex_t : public output_t /// The connection to the database server. pg_conn_t m_db_connection; - // This is shared between all clones of the output and must only be + // These are shared between all clones of the output and must only be // accessed while protected using the lua_mutex. + std::shared_ptr m_stage2_node_ids = std::make_shared(); std::shared_ptr m_stage2_way_ids = std::make_shared(); std::shared_ptr m_copy_thread; diff --git a/src/output.hpp b/src/output.hpp index ac807e425..f32107d95 100644 --- a/src/output.hpp +++ b/src/output.hpp @@ -70,6 +70,12 @@ class output_t virtual void wait() {} + virtual idlist_t const &get_marked_node_ids() + { + static idlist_t const ids{}; + return ids; + } + virtual idlist_t const &get_marked_way_ids() { static idlist_t const ids{};