From baab89177dfe77df60b4edc34db2c4d3fc1dc404 Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Sat, 20 Jul 2024 18:11:25 +0800 Subject: [PATCH 01/13] produce sources JAR greatly improves debugging dependent artifacts --- build.gradle | 1 + 1 file changed, 1 insertion(+) diff --git a/build.gradle b/build.gradle index d93f0e90b..9379ebf6c 100644 --- a/build.gradle +++ b/build.gradle @@ -14,6 +14,7 @@ java { toolchain { languageVersion.set(JavaLanguageVersion.of(21)) } + withSourcesJar() } jar { From eac127268921e69832d54d0d2c2aae47d4a7732f Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Sat, 20 Jul 2024 18:13:01 +0800 Subject: [PATCH 02/13] update some comments --- .../com/conveyal/r5/analyst/NetworkPreloader.java | 4 +++- .../conveyal/r5/analyst/cluster/ScenarioCache.java | 12 ++++++------ .../java/com/conveyal/r5/streets/StreetRouter.java | 2 +- .../conveyal/r5/transit/TransportNetworkCache.java | 1 + .../java/com/conveyal/r5/transit/TripPattern.java | 1 + 5 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/main/java/com/conveyal/r5/analyst/NetworkPreloader.java b/src/main/java/com/conveyal/r5/analyst/NetworkPreloader.java index 943ae7a8e..c3ee89aef 100644 --- a/src/main/java/com/conveyal/r5/analyst/NetworkPreloader.java +++ b/src/main/java/com/conveyal/r5/analyst/NetworkPreloader.java @@ -110,11 +110,13 @@ protected TransportNetwork buildValue(Key key) { // Get the set of points to which we are measuring travel time. Any smaller sub-grids created here will // reference the scenarioNetwork's built-in full-extent pointset, so can reuse its linkage. - // TODO handle multiple destination grids. + // FIXME handle multiple destination grids. if (key.destinationGridExtents == null) { // Special (and ideally temporary) case for regional freeform destinations, where there is no grid to link. // The null destinationGridExtents are created by the WebMercatorExtents#forPointsets else clause. + // FIXME there is no grid to link, but there are points and egress tables to make! + // see com.conveyal.r5.analyst.cluster.AnalysisWorkerTask.loadAndValidateDestinationPointSets return scenarioNetwork; } diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/ScenarioCache.java b/src/main/java/com/conveyal/r5/analyst/cluster/ScenarioCache.java index d9e6789a8..a8fa1a61e 100644 --- a/src/main/java/com/conveyal/r5/analyst/cluster/ScenarioCache.java +++ b/src/main/java/com/conveyal/r5/analyst/cluster/ScenarioCache.java @@ -26,14 +26,14 @@ * scenarios from the backend instead of from S3. * * TODO merge this with TransportNetworkCache#resolveScenario into a single multi-level mem/disk/s3 cache. - * Note that this cache is going to just grow indefinitely in size as a worker receives many iterations of the same - * scenario - that could be a memory leak. Again multi level caching could releive those worries. - * It's debatable whether we should be hanging on to scenarios passed with single point requests becuase they may never - * be used again. + * This cache grows in size without bound as a worker receives many iterations of the same scenario. + * This is technically a sort of memory leak for long-lived workers. Multi-level caching could relieve those worries. + * However, this cache stores only the Scenarios and Modifications, not any large egress tables or linkages. + * + * It's debatable whether we should be hanging on to scenarios passed with single point requests, + * because they may never be used again. * Should we just always require a single point task to be sent to the cluster before a regional? * That would not ensure the scenario was present on all workers though. - * - * Created by abyrd on 2018-10-29 */ public class ScenarioCache { diff --git a/src/main/java/com/conveyal/r5/streets/StreetRouter.java b/src/main/java/com/conveyal/r5/streets/StreetRouter.java index fa3750e35..2fa4b24e5 100644 --- a/src/main/java/com/conveyal/r5/streets/StreetRouter.java +++ b/src/main/java/com/conveyal/r5/streets/StreetRouter.java @@ -742,7 +742,7 @@ public int getTravelTimeToVertex (int vertexIndex) { * fragments from the vertices at either end of the edge up to the destination split point. * If no states can be produced return null. * - * Note that this is only used by the point to point street router, not by LinkedPointSets (which have equivalent + * NOTE that this is ONLY USED BY the point to point street router, NOT BY LinkedPointSets (which have equivalent * logic in their eval method). The PointSet implementation only needs to produce times, not States. But ideally * some common logic can be factored out. */ diff --git a/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java b/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java index b7f1f1d8f..8a5a4160d 100644 --- a/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java +++ b/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java @@ -185,6 +185,7 @@ private TransportNetworkConfig loadNetworkConfig (String networkId) { // The switch to use JSON manifests instead of zips occurred in 32a1aebe in July 2016. // Over six years have passed, buildNetworkFromBundleZip is deprecated and could probably be removed. LOG.warn("No network config (aka manifest) found. Assuming old-format network inputs bundle stored as a single ZIP file."); + // FIXME Bundle ZIP building to reduce duplicate code. network = buildNetworkFromBundleZip(networkId); } else { network = buildNetworkFromConfig(networkConfig); diff --git a/src/main/java/com/conveyal/r5/transit/TripPattern.java b/src/main/java/com/conveyal/r5/transit/TripPattern.java index 7c7e08224..522fa8907 100644 --- a/src/main/java/com/conveyal/r5/transit/TripPattern.java +++ b/src/main/java/com/conveyal/r5/transit/TripPattern.java @@ -34,6 +34,7 @@ public class TripPattern implements Serializable, Cloneable { * ID in this transport network the ID would depend on the order of application of scenarios, and because this ID is * used to map results back to the original network. * TODO This concept of an "original" transport network may be obsolete, this field doesn't seem to be used anywhere. + * These are set to sequential integers: the index of the pattern in the TransitLayer's list of patterns. */ public int originalId; From 704c8ce9fbe0222591c438d9927f3fbcae4a9011 Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Sat, 20 Jul 2024 18:15:36 +0800 Subject: [PATCH 03/13] Unified size-limited scenario network cache Moved from individual TransportNetwork instances to a single cache under TransportNetworkCache. --- .../r5/analyst/cluster/ScenarioCache.java | 2 +- .../r5/analyst/cluster/WorkerStatus.java | 2 - .../conveyal/r5/transit/TransportNetwork.java | 10 +- .../r5/transit/TransportNetworkCache.java | 113 ++++++++---------- 4 files changed, 57 insertions(+), 70 deletions(-) diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/ScenarioCache.java b/src/main/java/com/conveyal/r5/analyst/cluster/ScenarioCache.java index a8fa1a61e..d253e4b6b 100644 --- a/src/main/java/com/conveyal/r5/analyst/cluster/ScenarioCache.java +++ b/src/main/java/com/conveyal/r5/analyst/cluster/ScenarioCache.java @@ -44,7 +44,7 @@ public class ScenarioCache { public synchronized void storeScenario (Scenario scenario) { Scenario existingScenario = scenariosById.put(scenario.id, scenario); if (existingScenario != null) { - LOG.debug("Scenario cache already contained a this scenario."); + LOG.debug("Scenario cache already contained this scenario."); } } diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/WorkerStatus.java b/src/main/java/com/conveyal/r5/analyst/cluster/WorkerStatus.java index 1fcd17a6e..9b6d54632 100644 --- a/src/main/java/com/conveyal/r5/analyst/cluster/WorkerStatus.java +++ b/src/main/java/com/conveyal/r5/analyst/cluster/WorkerStatus.java @@ -39,7 +39,6 @@ public class WorkerStatus { public String workerVersion; public String workerId; public Set networks = new HashSet<>(); - public Set scenarios = new HashSet<>(); public double secondsSinceLastPoll; public Map tasksPerMinuteByJobId; @JsonUnwrapped(prefix = "ec2") @@ -86,7 +85,6 @@ public WorkerStatus (AnalysisWorker worker) { // networks = worker.networkPreloader.transportNetworkCache.getLoadedNetworkIds(); // For now we report a single network, even before it's loaded. networks = Sets.newHashSet(worker.networkId); - scenarios = worker.networkPreloader.transportNetworkCache.getAppliedScenarios(); ec2 = worker.ec2info; OperatingSystemMXBean operatingSystemMXBean = ManagementFactory.getOperatingSystemMXBean(); diff --git a/src/main/java/com/conveyal/r5/transit/TransportNetwork.java b/src/main/java/com/conveyal/r5/transit/TransportNetwork.java index 3e3cb3720..bbf4f8f70 100644 --- a/src/main/java/com/conveyal/r5/transit/TransportNetwork.java +++ b/src/main/java/com/conveyal/r5/transit/TransportNetwork.java @@ -53,14 +53,10 @@ public class TransportNetwork implements Serializable { public TransitLayer transitLayer; /** - * This stores any number of lightweight scenario networks built upon the current base network. - * FIXME that sounds like a memory leak, should be a WeighingCache or at least size-limited. - * A single network cache at the top level could store base networks and scenarios since they all have globally - * unique IDs. A hierarchical cache does have the advantage of evicting all the scenarios with the associated - * base network, which keeps the references in the scenarios from holding on to the base network. But considering - * that we have never started evicting networks (other than for a "cache" of one element) this might be getting - * ahead of ourselves. + * This field is no longer used. It has been moved to TransportNetworkCache, but this one remains for now, to + * avoid any inadvertent incompatibilities with serialized network files or serialization library settings. */ + @Deprecated public transient Map scenarios = new HashMap<>(); /** diff --git a/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java b/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java index 8a5a4160d..8b79e7417 100644 --- a/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java +++ b/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java @@ -29,11 +29,8 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; -import java.util.Collection; -import java.util.HashMap; import java.util.List; import java.util.Set; -import java.util.stream.Collectors; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; @@ -53,10 +50,19 @@ public class TransportNetworkCache implements Component { private static final Logger LOG = LoggerFactory.getLogger(TransportNetworkCache.class); /** Cache size is currently limited to one, i.e. the worker holds on to only one network at a time. */ - private static final int DEFAULT_CACHE_SIZE = 1; + private static final int MAX_CACHED_NETWORKS = 1; + + /** + * It might seem sufficient to hold only two scenarios (for single point scenario comparison). But in certain cases + * (e.g. the regional task queue is bigger than the size of each queued regional job) we might end up working on + * a mix of tasks from N different scenarios. Note also that scenarios hold references to their base networks, so + * caching multiple scenario networks can theoretically keep just as many TransportNetworks in memory. + * But in practice, all cloud workers currently remain on a single for their entire lifespan. + */ + public static final int MAX_CACHED_SCENARIO_NETWORKS = 10; // TODO change all other caches from Guava to Caffeine caches. This one is already a Caffeine cache. - private final LoadingCache cache; + private final LoadingCache networkCache; private final FileStorage fileStorage; private final GTFSCache gtfsCache; @@ -64,15 +70,39 @@ public class TransportNetworkCache implements Component { /** * A table of already seen scenarios, avoiding downloading them repeatedly from S3 and allowing us to replace - * scenarios with only their IDs, and reverse that replacement later. + * scenarios with only their IDs, and reverse that replacement later. Note that this caches the Scenario objects + * themselves, not the TransportNetworks built from those Scenarios. */ private final ScenarioCache scenarioCache = new ScenarioCache(); + /** + * This record type is used for the private, encapsulated cache of TransportNetworks for different scenarios. + * Scenario IDs are unique so we could look up these networks by scenario ID alone. However the cache values need + * to be derived entirely from the cache keys. We need some way to look up the base network so we include its ID. + */ + private record BaseAndScenarioId (String baseNetworkId, String scenarioId) { } + + /** + * This stores a number of lightweight scenario networks built upon the current base network. + * Each scenario TransportNetwork has its own LinkageCache, containing LinkedPointSets that each have their own + * EgressCostTable. In practice this can exhaust memory, e.g. after using bicycle egress for about 50 scenarios. + * The previous hierarchical arrangement of caches has the advantage of evicting all the scenarios with the + * associated base network, which keeps the references in the scenarios from holding on to the base network. + * But considering that we have never started evicting networks (other than for a "cache" of one element) this + * eviction can be handled in other ways. + */ + private LoadingCache scenarioNetworkCache; + /** Create a transport network cache. If source bucket is null, will work offline. */ public TransportNetworkCache (FileStorage fileStorage, GTFSCache gtfsCache, OSMCache osmCache) { this.osmCache = osmCache; this.gtfsCache = gtfsCache; - this.cache = createCache(DEFAULT_CACHE_SIZE); + this.networkCache = Caffeine.newBuilder() + .maximumSize(MAX_CACHED_NETWORKS) + .build(this::loadNetwork); + this.scenarioNetworkCache = Caffeine.newBuilder() + .maximumSize(MAX_CACHED_SCENARIO_NETWORKS) + .build(this::loadScenario); this.fileStorage = fileStorage; } @@ -83,7 +113,7 @@ public TransportNetworkCache (FileStorage fileStorage, GTFSCache gtfsCache, OSMC public synchronized @Nonnull TransportNetwork getNetwork (String networkId) throws TransportNetworkException { try { - return cache.get(networkId); + return networkCache.get(networkId); } catch (Exception e) { throw new TransportNetworkException("Could not load TransportNetwork into cache. ", e); } @@ -119,31 +149,22 @@ public void rememberScenario (Scenario scenario) { * tables is already parallelized. */ public synchronized TransportNetwork getNetworkForScenario (String networkId, String scenarioId) { - // If the networkId is different than previous calls, a new network will be loaded. Its transient nested map - // of scenarios will be empty at first. This ensures it's initialized if null. - // FIXME apparently this can't happen - the field is transient and initialized in TransportNetwork. - TransportNetwork baseNetwork = this.getNetwork(networkId); - if (baseNetwork.scenarios == null) { - baseNetwork.scenarios = new HashMap<>(); - } + TransportNetwork scenarioNetwork = scenarioNetworkCache.get(new BaseAndScenarioId(networkId, scenarioId)); + return scenarioNetwork; + } - TransportNetwork scenarioNetwork = baseNetwork.scenarios.get(scenarioId); - if (scenarioNetwork == null) { - // The network for this scenario was not found in the cache. Create that scenario network and cache it. - LOG.debug("Applying scenario to base network..."); - // Fetch the full scenario if an ID was specified. - Scenario scenario = resolveScenario(networkId, scenarioId); - // Apply any scenario modifications to the network before use, performing protective copies where necessary. - // We used to prepend a filter to the scenario, removing trips that are not running during the search time window. - // However, because we are caching transportNetworks with scenarios already applied to them, we can’t use - // the InactiveTripsFilter. The solution may be to cache linked point sets based on scenario ID but always - // apply scenarios every time. - scenarioNetwork = scenario.applyToTransportNetwork(baseNetwork); - LOG.debug("Done applying scenario. Caching the resulting network."); - baseNetwork.scenarios.put(scenario.id, scenarioNetwork); - } else { - LOG.debug("Reusing cached TransportNetwork for scenario {}.", scenarioId); - } + private TransportNetwork loadScenario (BaseAndScenarioId ids) { + TransportNetwork baseNetwork = this.getNetwork(ids.baseNetworkId()); + LOG.debug("Scenario TransportNetwork not found. Applying scenario to base network and caching it."); + // Fetch the full scenario if an ID was specified. + Scenario scenario = resolveScenario(ids.baseNetworkId(), ids.scenarioId()); + // Apply any scenario modifications to the network before use, performing protective copies where necessary. + // We used to prepend a filter to the scenario, removing trips that are not running during the search time window. + // However, because we are caching transportNetworks with scenarios already applied to them, we can’t use + // the InactiveTripsFilter. The solution may be to cache linked point sets based on scenario ID but always + // apply scenarios every time. + TransportNetwork scenarioNetwork = scenario.applyToTransportNetwork(baseNetwork); + LOG.debug("Done applying scenario. Caching the resulting network."); return scenarioNetwork; } @@ -356,12 +377,6 @@ private String getNetworkConfigFilename (String networkId) { return GTFSCache.cleanId(networkId) + ".json"; } - private LoadingCache createCache(int size) { - return Caffeine.newBuilder() - .maximumSize(size) - .build(this::loadNetwork); - } - /** * CacheLoader method, which should only be called by the LoadingCache. * Return the graph for the given unique identifier. Load pre-built serialized networks from local or remote @@ -393,28 +408,6 @@ private LoadingCache createCache(int size) { } } - /** - * This will eventually be used in WorkerStatus to report to the backend all loaded networks, to give it hints about - * what kind of tasks the worker is ready to work on immediately. This is made more complicated by the fact that - * workers are started up with no networks loaded, but with the intent for them to work on a particular job. So - * currently the workers just report which network they were started up for, and this method is not used. - * - * In the future, workers should just report an empty set of loaded networks, and the back end should strategically - * send them tasks when they come on line to assign them to networks as needed. But this will require a new - * mechanism to fairly allocate the workers to jobs. - */ - public Set getLoadedNetworkIds() { - return cache.asMap().keySet(); - } - - public Set getAppliedScenarios() { - return cache.asMap().values().stream() - .filter(network -> network.scenarios != null) - .map(network -> network.scenarios.keySet()) - .flatMap(Collection::stream) - .collect(Collectors.toSet()); - } - /** * Given a network and scenario ID, retrieve that scenario from the local disk cache (falling back on S3). */ From 1862bef580d624e9826bf35c638e853e4375d6ac Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Sat, 20 Jul 2024 19:34:16 +0800 Subject: [PATCH 04/13] remove explicit synchronization, rely on caches --- .../r5/transit/TransportNetworkCache.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java b/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java index 8b79e7417..c2bfa290f 100644 --- a/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java +++ b/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java @@ -110,8 +110,7 @@ public TransportNetworkCache (FileStorage fileStorage, GTFSCache gtfsCache, OSMC * Find a transport network by ID, building or loading as needed from pre-existing OSM, GTFS, MapDB, or Kryo files. * This should never return null. If a TransportNetwork can't be built or loaded, an exception will be thrown. */ - public synchronized @Nonnull - TransportNetwork getNetwork (String networkId) throws TransportNetworkException { + public TransportNetwork getNetwork (String networkId) throws TransportNetworkException { try { return networkCache.get(networkId); } catch (Exception e) { @@ -137,18 +136,19 @@ public void rememberScenario (Scenario scenario) { * base graphs). Therefore we can look up cached scenario networks based solely on their scenarioId rather than a * compound key of (networkId, scenarioId). * - * The fact that scenario networks are cached means that PointSet linkages will be automatically reused. + * Reusing scenario networks automatically leads to reuse of the associated PointSet linkages and egress tables. * TODO it seems to me that this method should just take a Scenario as its second parameter, and that resolving * the scenario against caches on S3 or local disk should be pulled out into a separate function. * The problem is that then you resolve the scenario every time, even when the ID is enough to look up the already * built network. So we need to pass the whole task in here, so either the ID or full scenario are visible. * - * Thread safety notes: This entire method is synchronized so access by multiple threads will be sequential. - * The first thread will have a chance to build and store the requested scenario before any others see it. - * This means each new scenario will be applied one after the other. This is probably OK as long as building egress - * tables is already parallelized. + * Thread safety: getNetwork and getNetworkForScenario are threadsafe caches, so access to the same key by multiple + * threads will occur sequentially without repeatedly or simultaneously performing the same loading actions. + * Javadoc on the Caffeine LoadingCache indicates that it will throw exceptions when the cache loader method throws + * them, without establishing a mapping in the cache. So exceptions occurring during scenario application are + * expected to bubble up unimpeded. */ - public synchronized TransportNetwork getNetworkForScenario (String networkId, String scenarioId) { + public TransportNetwork getNetworkForScenario (String networkId, String scenarioId) { TransportNetwork scenarioNetwork = scenarioNetworkCache.get(new BaseAndScenarioId(networkId, scenarioId)); return scenarioNetwork; } From 9d36b010d91549a0e4df14126eaf25f277f9a0b6 Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Sat, 20 Jul 2024 22:24:26 +0800 Subject: [PATCH 05/13] fix javadoc --- .../java/com/conveyal/r5/transit/TransportNetworkCache.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java b/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java index c2bfa290f..1c512640b 100644 --- a/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java +++ b/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java @@ -57,7 +57,8 @@ public class TransportNetworkCache implements Component { * (e.g. the regional task queue is bigger than the size of each queued regional job) we might end up working on * a mix of tasks from N different scenarios. Note also that scenarios hold references to their base networks, so * caching multiple scenario networks can theoretically keep just as many TransportNetworks in memory. - * But in practice, all cloud workers currently remain on a single for their entire lifespan. + * But in practice, in non-local (cloud) operation a given worker instance is locked to a single network for its + * entire lifespan. */ public static final int MAX_CACHED_SCENARIO_NETWORKS = 10; From c417eee00fbff4f73e81144c8a8a9328e5f5301c Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Mon, 30 Sep 2024 18:33:28 +0200 Subject: [PATCH 06/13] store and retrieve user-specified network config --- .../controllers/BundleController.java | 47 +++++++++++++++---- .../com/conveyal/analysis/models/Bundle.java | 6 +++ 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/BundleController.java b/src/main/java/com/conveyal/analysis/controllers/BundleController.java index b7fc71cc5..dc3034583 100644 --- a/src/main/java/com/conveyal/analysis/controllers/BundleController.java +++ b/src/main/java/com/conveyal/analysis/controllers/BundleController.java @@ -13,7 +13,6 @@ import com.conveyal.file.FileUtils; import com.conveyal.gtfs.GTFSCache; import com.conveyal.gtfs.GTFSFeed; -import com.conveyal.gtfs.error.GTFSError; import com.conveyal.gtfs.error.GeneralError; import com.conveyal.gtfs.model.Stop; import com.conveyal.gtfs.validator.PostLoadValidator; @@ -81,6 +80,7 @@ public BundleController (BackendComponents components) { public void registerEndpoints (Service sparkService) { sparkService.path("/api/bundle", () -> { sparkService.get("", this::getBundles, toJson); + sparkService.get("/:_id/config", this::getBundleConfig, toJson); sparkService.get("/:_id", this::getBundle, toJson); sparkService.post("", this::create, toJson); sparkService.put("/:_id", this::update, toJson); @@ -110,7 +110,6 @@ private Bundle create (Request req, Response res) { try { bundle.name = files.get("bundleName").get(0).getString("UTF-8"); bundle.regionId = files.get("regionId").get(0).getString("UTF-8"); - if (files.get("osmId") != null) { bundle.osmId = files.get("osmId").get(0).getString("UTF-8"); Bundle bundleWithOsm = Persistence.bundles.find(QueryBuilder.start("osmId").is(bundle.osmId).get()).next(); @@ -118,7 +117,6 @@ private Bundle create (Request req, Response res) { throw AnalysisServerException.badRequest("Selected OSM does not exist."); } } - if (files.get("feedGroupId") != null) { bundle.feedGroupId = files.get("feedGroupId").get(0).getString("UTF-8"); Bundle bundleWithFeed = Persistence.bundles.find(QueryBuilder.start("feedGroupId").is(bundle.feedGroupId).get()).next(); @@ -135,6 +133,12 @@ private Bundle create (Request req, Response res) { bundle.feedsComplete = bundleWithFeed.feedsComplete; bundle.totalFeeds = bundleWithFeed.totalFeeds; } + if (files.get("config") != null) { + // For validation, rather than reading as freeform JSON, deserialize into a model class instance. + // However, only the instance fields specifying things other than OSM and GTFS IDs will be retained. + String configString = files.get("config").get(0).getString(); + bundle.config = JsonUtil.objectMapper.readValue(configString, TransportNetworkConfig.class); + } UserPermissions userPermissions = UserPermissions.from(req); bundle.accessGroup = userPermissions.accessGroup; bundle.createdBy = userPermissions.email; @@ -274,15 +278,19 @@ private Bundle create (Request req, Response res) { return bundle; } + /** SIDE EFFECTS: This method will change the field bundle.config before writing it. */ private void writeNetworkConfigToCache (Bundle bundle) throws IOException { - TransportNetworkConfig networkConfig = new TransportNetworkConfig(); - networkConfig.osmId = bundle.osmId; - networkConfig.gtfsIds = bundle.feeds.stream().map(f -> f.bundleScopedFeedId).collect(Collectors.toList()); - + // If the user specified additional network configuration options, they should already be in bundle.config. + // If no custom options were specified, we start with a fresh, empty instance. + if (bundle.config == null) { + bundle.config = new TransportNetworkConfig(); + } + // This will overwrite and override any inconsistent osm and gtfs IDs that were mistakenly supplied by the user. + bundle.config.osmId = bundle.osmId; + bundle.config.gtfsIds = bundle.feeds.stream().map(f -> f.bundleScopedFeedId).collect(Collectors.toList()); String configFileName = bundle._id + ".json"; File configFile = FileUtils.createScratchFile("json"); - JsonUtil.objectMapper.writeValue(configFile, networkConfig); - + JsonUtil.objectMapper.writeValue(configFile, bundle.config); FileStorageKey key = new FileStorageKey(BUNDLES, configFileName); fileStorage.moveIntoStorage(key, configFile); } @@ -312,6 +320,27 @@ private Bundle getBundle (Request req, Response res) { return bundle; } + /** + * There are two copies of the Bundle/Network config: one in the Bundle entry in the database and one in a JSON + * file (obtainable by the workers). This method always reads the one in the file, which has been around longer + * and is considered the definitive source of truth. The entry in the database is a newer addition and has only + * been around since September 2024. + */ + private TransportNetworkConfig getBundleConfig (Request request, Response res) { + // Unfortunately this mimics logic in TransportNetworkCache. Deduplicate in a static utility method? + String id = GTFSCache.cleanId(request.params("_id")); + FileStorageKey key = new FileStorageKey(BUNDLES, id, "json"); + File networkConfigFile = fileStorage.getFile(key); + // Unlike in the worker, we expect the backend to have a model field for every known network/bundle option. + // Threfore, use the default objectMapper that does not tolerate unknown fields. + try { + return JsonUtil.objectMapper.readValue(networkConfigFile, TransportNetworkConfig.class); + } catch (Exception exception) { + LOG.error("Exception deserializing stored network config", exception); + return null; + } + } + private Collection getBundles (Request req, Response res) { return Persistence.bundles.findPermittedForQuery(req); } diff --git a/src/main/java/com/conveyal/analysis/models/Bundle.java b/src/main/java/com/conveyal/analysis/models/Bundle.java index 912f066b4..44cea922b 100644 --- a/src/main/java/com/conveyal/analysis/models/Bundle.java +++ b/src/main/java/com/conveyal/analysis/models/Bundle.java @@ -5,6 +5,7 @@ import com.conveyal.gtfs.error.GTFSError; import com.conveyal.gtfs.model.FeedInfo; import com.conveyal.gtfs.validator.model.Priority; +import com.conveyal.r5.analyst.cluster.TransportNetworkConfig; import com.fasterxml.jackson.annotation.JsonIgnore; import java.time.LocalDate; @@ -47,6 +48,11 @@ public class Bundle extends Model implements Cloneable { public int feedsComplete; public int totalFeeds; + // The definitive TransportNetworkConfig is a JSON file stored alonside the feeds in file storage. It is + // duplicated here to record any additional user-specified options that were supplied when the bundle was created. + // It may contain redundant copies of information stored in the outer level Bundle such as OSM and GTFS feed IDs. + public TransportNetworkConfig config; + public static String bundleScopeFeedId (String feedId, String feedGroupId) { return String.format("%s_%s", feedId, feedGroupId); } From e3094f8a32aa5ef8be8a816ef6fe42bc5ea8513c Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Wed, 2 Oct 2024 12:17:21 -0400 Subject: [PATCH 07/13] strict objectmapper to detect misspelled fields --- .../com/conveyal/analysis/controllers/BundleController.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/BundleController.java b/src/main/java/com/conveyal/analysis/controllers/BundleController.java index dc3034583..4f26d0aed 100644 --- a/src/main/java/com/conveyal/analysis/controllers/BundleController.java +++ b/src/main/java/com/conveyal/analysis/controllers/BundleController.java @@ -21,6 +21,7 @@ import com.conveyal.r5.analyst.progress.ProgressInputStream; import com.conveyal.r5.analyst.cluster.TransportNetworkConfig; import com.conveyal.r5.analyst.progress.Task; +import com.conveyal.r5.common.JsonUtilities; import com.conveyal.r5.streets.OSMCache; import com.conveyal.r5.util.ExceptionUtils; import com.mongodb.QueryBuilder; @@ -136,8 +137,9 @@ private Bundle create (Request req, Response res) { if (files.get("config") != null) { // For validation, rather than reading as freeform JSON, deserialize into a model class instance. // However, only the instance fields specifying things other than OSM and GTFS IDs will be retained. + // Use strict object mapper (from the strict/lenient pair) to fail on misspelled field names. String configString = files.get("config").get(0).getString(); - bundle.config = JsonUtil.objectMapper.readValue(configString, TransportNetworkConfig.class); + bundle.config = JsonUtilities.objectMapper.readValue(configString, TransportNetworkConfig.class); } UserPermissions userPermissions = UserPermissions.from(req); bundle.accessGroup = userPermissions.accessGroup; From f5e10ee4a05837dc08fbc29c35cfd26bb4cd10ba Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Wed, 2 Oct 2024 12:30:02 -0400 Subject: [PATCH 08/13] update to latest mongodb legacy java driver This is the newest driver, but with an older programming API that matches our code. It should be a drop-in replacement, and Mongojack also seems to pick this up as its transitive mongodb driver dependency. --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index d93f0e90b..6452a06dc 100644 --- a/build.gradle +++ b/build.gradle @@ -164,7 +164,7 @@ dependencies { } // Database driver. - implementation 'org.mongodb:mongo-java-driver:3.11.0' + implementation 'org.mongodb:mongodb-driver-legacy:5.2.0' // Legacy system for storing Java objects, this functionality is now provided by the MongoDB driver itself. implementation 'org.mongojack:mongojack:2.10.1' From ecc8002021ce86927b19c6a0240a468680513af4 Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Thu, 3 Oct 2024 12:16:08 -0400 Subject: [PATCH 09/13] Revert "strict objectmapper to detect misspelled fields" This reverts commit 3094f8a32aa5ef8be8a816ef6fe42bc5ea8513c. The backend needs to allow sending config to workers with new features it doesn't know about. --- .../com/conveyal/analysis/controllers/BundleController.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/BundleController.java b/src/main/java/com/conveyal/analysis/controllers/BundleController.java index 4f26d0aed..dc3034583 100644 --- a/src/main/java/com/conveyal/analysis/controllers/BundleController.java +++ b/src/main/java/com/conveyal/analysis/controllers/BundleController.java @@ -21,7 +21,6 @@ import com.conveyal.r5.analyst.progress.ProgressInputStream; import com.conveyal.r5.analyst.cluster.TransportNetworkConfig; import com.conveyal.r5.analyst.progress.Task; -import com.conveyal.r5.common.JsonUtilities; import com.conveyal.r5.streets.OSMCache; import com.conveyal.r5.util.ExceptionUtils; import com.mongodb.QueryBuilder; @@ -137,9 +136,8 @@ private Bundle create (Request req, Response res) { if (files.get("config") != null) { // For validation, rather than reading as freeform JSON, deserialize into a model class instance. // However, only the instance fields specifying things other than OSM and GTFS IDs will be retained. - // Use strict object mapper (from the strict/lenient pair) to fail on misspelled field names. String configString = files.get("config").get(0).getString(); - bundle.config = JsonUtilities.objectMapper.readValue(configString, TransportNetworkConfig.class); + bundle.config = JsonUtil.objectMapper.readValue(configString, TransportNetworkConfig.class); } UserPermissions userPermissions = UserPermissions.from(req); bundle.accessGroup = userPermissions.accessGroup; From e4614bb6ac54927158a883daef49060e29ba28dd Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Thu, 3 Oct 2024 12:27:29 -0400 Subject: [PATCH 10/13] clarify why unknown field names are tolerated --- .../com/conveyal/analysis/controllers/BundleController.java | 5 +++-- src/main/java/com/conveyal/analysis/models/Bundle.java | 2 +- .../java/com/conveyal/r5/transit/TransportNetworkCache.java | 2 ++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/BundleController.java b/src/main/java/com/conveyal/analysis/controllers/BundleController.java index dc3034583..decd74a2c 100644 --- a/src/main/java/com/conveyal/analysis/controllers/BundleController.java +++ b/src/main/java/com/conveyal/analysis/controllers/BundleController.java @@ -134,8 +134,9 @@ private Bundle create (Request req, Response res) { bundle.totalFeeds = bundleWithFeed.totalFeeds; } if (files.get("config") != null) { - // For validation, rather than reading as freeform JSON, deserialize into a model class instance. - // However, only the instance fields specifying things other than OSM and GTFS IDs will be retained. + // Validation by deserializing into a model class instance. Unknown fields are ignored to + // allow sending config to custom or experimental workers with features unknown to the backend. + // The fields specifying OSM and GTFS IDs are not expected here. They will be ignored and overwritten. String configString = files.get("config").get(0).getString(); bundle.config = JsonUtil.objectMapper.readValue(configString, TransportNetworkConfig.class); } diff --git a/src/main/java/com/conveyal/analysis/models/Bundle.java b/src/main/java/com/conveyal/analysis/models/Bundle.java index 44cea922b..4d5bef3e6 100644 --- a/src/main/java/com/conveyal/analysis/models/Bundle.java +++ b/src/main/java/com/conveyal/analysis/models/Bundle.java @@ -48,7 +48,7 @@ public class Bundle extends Model implements Cloneable { public int feedsComplete; public int totalFeeds; - // The definitive TransportNetworkConfig is a JSON file stored alonside the feeds in file storage. It is + // The definitive TransportNetworkConfig is a JSON file stored alongside the feeds in file storage. It is // duplicated here to record any additional user-specified options that were supplied when the bundle was created. // It may contain redundant copies of information stored in the outer level Bundle such as OSM and GTFS feed IDs. public TransportNetworkConfig config; diff --git a/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java b/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java index b7f1f1d8f..6a0c291ab 100644 --- a/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java +++ b/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java @@ -168,6 +168,8 @@ private TransportNetworkConfig loadNetworkConfig (String networkId) { File configFile = fileStorage.getFile(configFileKey); try { // Use lenient mapper to mimic behavior in objectFromRequestBody. + // A single network configuration file might be used across several worker versions. Unknown field names + // may be present for other worker versions unknown to this one. So we can't strictly validate field names. return JsonUtilities.lenientObjectMapper.readValue(configFile, TransportNetworkConfig.class); } catch (IOException e) { throw new RuntimeException("Error reading TransportNetworkConfig. Does it contain new unrecognized fields?", e); From 434dcf930c7d36292b9ea7a070ab48c768c17291 Mon Sep 17 00:00:00 2001 From: Trevor Gerhardt Date: Fri, 4 Oct 2024 21:47:06 +0800 Subject: [PATCH 11/13] Check if the config exists, return 404 if missing --- .../analysis/controllers/BundleController.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/BundleController.java b/src/main/java/com/conveyal/analysis/controllers/BundleController.java index decd74a2c..a83160ad3 100644 --- a/src/main/java/com/conveyal/analysis/controllers/BundleController.java +++ b/src/main/java/com/conveyal/analysis/controllers/BundleController.java @@ -18,8 +18,8 @@ import com.conveyal.gtfs.validator.PostLoadValidator; import com.conveyal.osmlib.Node; import com.conveyal.osmlib.OSM; -import com.conveyal.r5.analyst.progress.ProgressInputStream; import com.conveyal.r5.analyst.cluster.TransportNetworkConfig; +import com.conveyal.r5.analyst.progress.ProgressInputStream; import com.conveyal.r5.analyst.progress.Task; import com.conveyal.r5.streets.OSMCache; import com.conveyal.r5.util.ExceptionUtils; @@ -327,13 +327,17 @@ private Bundle getBundle (Request req, Response res) { * and is considered the definitive source of truth. The entry in the database is a newer addition and has only * been around since September 2024. */ - private TransportNetworkConfig getBundleConfig (Request request, Response res) { + private TransportNetworkConfig getBundleConfig(Request request, Response res) throws IOException { // Unfortunately this mimics logic in TransportNetworkCache. Deduplicate in a static utility method? String id = GTFSCache.cleanId(request.params("_id")); FileStorageKey key = new FileStorageKey(BUNDLES, id, "json"); File networkConfigFile = fileStorage.getFile(key); + if (!networkConfigFile.exists()) { + throw AnalysisServerException.notFound("Bundle configuration file could not be found."); + } + // Unlike in the worker, we expect the backend to have a model field for every known network/bundle option. - // Threfore, use the default objectMapper that does not tolerate unknown fields. + // Therefore, use the default objectMapper that does not tolerate unknown fields. try { return JsonUtil.objectMapper.readValue(networkConfigFile, TransportNetworkConfig.class); } catch (Exception exception) { From cd7840c2c75b8123be51b569284bbaf03e0e2bfb Mon Sep 17 00:00:00 2001 From: Trevor Gerhardt Date: Fri, 4 Oct 2024 21:49:06 +0800 Subject: [PATCH 12/13] Remove unnecessary throws clause --- .../com/conveyal/analysis/controllers/BundleController.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/BundleController.java b/src/main/java/com/conveyal/analysis/controllers/BundleController.java index a83160ad3..b493cf618 100644 --- a/src/main/java/com/conveyal/analysis/controllers/BundleController.java +++ b/src/main/java/com/conveyal/analysis/controllers/BundleController.java @@ -327,7 +327,7 @@ private Bundle getBundle (Request req, Response res) { * and is considered the definitive source of truth. The entry in the database is a newer addition and has only * been around since September 2024. */ - private TransportNetworkConfig getBundleConfig(Request request, Response res) throws IOException { + private TransportNetworkConfig getBundleConfig(Request request, Response res) { // Unfortunately this mimics logic in TransportNetworkCache. Deduplicate in a static utility method? String id = GTFSCache.cleanId(request.params("_id")); FileStorageKey key = new FileStorageKey(BUNDLES, id, "json"); From 8720aa674f1f6601646f286eab6b054f7e189a0d Mon Sep 17 00:00:00 2001 From: Anson Stewart Date: Fri, 4 Oct 2024 14:00:33 -0400 Subject: [PATCH 13/13] Add custom walking traversal permissions via TransportNetworkConfig (#943) * Add SidewalkTraversalPermissionLabeler activated via TransportNetworkConfig when building a network from a config JSON * Remove deprecated buildNetworkFromBundleZip Building from .zip files on S3 has not been supported since 2016. * Enable TransportNetworkConfig in non-cluster use such as PointToPointRouterServer * Make sidewalk PermissionLabeler more restrictive Disallow walking anywhere driving is allowed * Set permissionLabeler more cleanly --- .../cluster/TransportNetworkConfig.java | 11 +-- .../SidewalkTraversalPermissionLabeler.java | 35 ++++++++++ .../com/conveyal/r5/streets/StreetLayer.java | 24 ++++++- .../conveyal/r5/transit/TransportNetwork.java | 58 +++++++++++---- .../r5/transit/TransportNetworkCache.java | 70 ++----------------- 5 files changed, 113 insertions(+), 85 deletions(-) create mode 100644 src/main/java/com/conveyal/r5/labeling/SidewalkTraversalPermissionLabeler.java diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/TransportNetworkConfig.java b/src/main/java/com/conveyal/r5/analyst/cluster/TransportNetworkConfig.java index 012e3204a..387742634 100644 --- a/src/main/java/com/conveyal/r5/analyst/cluster/TransportNetworkConfig.java +++ b/src/main/java/com/conveyal/r5/analyst/cluster/TransportNetworkConfig.java @@ -2,12 +2,8 @@ import com.conveyal.r5.analyst.fare.InRoutingFareCalculator; import com.conveyal.r5.analyst.scenario.Modification; -import com.conveyal.r5.analyst.scenario.RasterCost; -import com.conveyal.r5.analyst.scenario.ShapefileLts; import com.conveyal.r5.profile.StreetMode; -import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.databind.annotation.JsonSerialize; import java.util.List; import java.util.Set; @@ -54,4 +50,11 @@ public class TransportNetworkConfig { */ public Set buildGridsForModes; + /** + * Specifies which "labeler" to use when setting traversal mode permissions from OSM tags. For now, only + * implemented with "sidewalk" to use the SidewalkTraversalPermissionLayer. This should eventually be cleaned up + * (specifying different labelers, using enums). + */ + public String traversalPermissionLabeler; + } diff --git a/src/main/java/com/conveyal/r5/labeling/SidewalkTraversalPermissionLabeler.java b/src/main/java/com/conveyal/r5/labeling/SidewalkTraversalPermissionLabeler.java new file mode 100644 index 000000000..34d2c8465 --- /dev/null +++ b/src/main/java/com/conveyal/r5/labeling/SidewalkTraversalPermissionLabeler.java @@ -0,0 +1,35 @@ +package com.conveyal.r5.labeling; + +import com.conveyal.osmlib.Way; +import com.conveyal.r5.streets.EdgeStore; + +/** + * Traversal permission labeler that restricts walking on most driving ways (useful for networks with complete + * sidewalks). Also includes permissions for the United States (see USTraversalPermissionLabeler). + */ +public class SidewalkTraversalPermissionLabeler extends TraversalPermissionLabeler { + static { + addPermissions("pedestrian", "bicycle=yes"); + addPermissions("bridleway", "bicycle=yes;foot=yes"); //horse=yes but we don't support horse + addPermissions("cycleway", "bicycle=yes;foot=yes"); + addPermissions("trunk|primary|secondary|tertiary|unclassified|residential|living_street|road|service|track", + "access=yes"); + } + + @Override + public RoadPermission getPermissions(Way way) { + RoadPermission rp = super.getPermissions(way); + if (rp.forward.contains(EdgeStore.EdgeFlag.ALLOWS_CAR) || + rp.forward.contains(EdgeStore.EdgeFlag.NO_THRU_TRAFFIC_CAR) || + rp.backward.contains(EdgeStore.EdgeFlag.ALLOWS_CAR) || + rp.backward.contains(EdgeStore.EdgeFlag.NO_THRU_TRAFFIC_CAR) + ) { + rp.forward.remove(EdgeStore.EdgeFlag.ALLOWS_PEDESTRIAN); + rp.forward.remove(EdgeStore.EdgeFlag.NO_THRU_TRAFFIC_PEDESTRIAN); + rp.backward.remove(EdgeStore.EdgeFlag.ALLOWS_PEDESTRIAN); + rp.backward.remove(EdgeStore.EdgeFlag.NO_THRU_TRAFFIC_PEDESTRIAN); + } + return rp; + } + +} diff --git a/src/main/java/com/conveyal/r5/streets/StreetLayer.java b/src/main/java/com/conveyal/r5/streets/StreetLayer.java index 3d781b41b..4e31481ee 100644 --- a/src/main/java/com/conveyal/r5/streets/StreetLayer.java +++ b/src/main/java/com/conveyal/r5/streets/StreetLayer.java @@ -6,12 +6,14 @@ import com.conveyal.osmlib.OSMEntity; import com.conveyal.osmlib.Relation; import com.conveyal.osmlib.Way; +import com.conveyal.r5.analyst.cluster.TransportNetworkConfig; import com.conveyal.r5.analyst.scenario.PickupWaitTimes; import com.conveyal.r5.api.util.BikeRentalStation; import com.conveyal.r5.api.util.ParkRideParking; import com.conveyal.r5.common.GeometryUtils; import com.conveyal.r5.labeling.LevelOfTrafficStressLabeler; import com.conveyal.r5.labeling.RoadPermission; +import com.conveyal.r5.labeling.SidewalkTraversalPermissionLabeler; import com.conveyal.r5.labeling.SpeedLabeler; import com.conveyal.r5.labeling.StreetClass; import com.conveyal.r5.labeling.TraversalPermissionLabeler; @@ -132,9 +134,9 @@ public class StreetLayer implements Serializable, Cloneable { public TIntObjectMap parkRideLocationsMap; // TODO these are only needed when building the network, should we really be keeping them here in the layer? - // We should instead have a network builder that holds references to this transient state. - // TODO don't hardwire to US - private transient TraversalPermissionLabeler permissionLabeler = new USTraversalPermissionLabeler(); + // We should instead have a network builder that holds references to this transient state. Note initial + // approach of specifying a TraversalPermissionLabeler in TransportNetworkConfig. + private transient TraversalPermissionLabeler permissionLabeler; private transient LevelOfTrafficStressLabeler stressLabeler = new LevelOfTrafficStressLabeler(); private transient TypeOfEdgeLabeler typeOfEdgeLabeler = new TypeOfEdgeLabeler(); private transient SpeedLabeler speedLabeler; @@ -207,6 +209,22 @@ public class StreetLayer implements Serializable, Cloneable { public StreetLayer() { speedLabeler = new SpeedLabeler(SpeedConfig.defaultConfig()); + permissionLabeler = new USTraversalPermissionLabeler(); + } + + public StreetLayer(TransportNetworkConfig config) { + this(); + if (config != null) { + permissionLabeler = switch (config.traversalPermissionLabeler) { + case "sidewalk" -> new SidewalkTraversalPermissionLabeler(); + case null -> new USTraversalPermissionLabeler(); + default -> throw new IllegalArgumentException( + "Unknown traversal permission labeler: " + config.traversalPermissionLabeler + ); + }; + } else { + permissionLabeler = new USTraversalPermissionLabeler(); + } } /** Load street layer from an OSM-lib OSM DB */ diff --git a/src/main/java/com/conveyal/r5/transit/TransportNetwork.java b/src/main/java/com/conveyal/r5/transit/TransportNetwork.java index bbf4f8f70..4d351eedf 100644 --- a/src/main/java/com/conveyal/r5/transit/TransportNetwork.java +++ b/src/main/java/com/conveyal/r5/transit/TransportNetwork.java @@ -4,9 +4,11 @@ import com.conveyal.osmlib.OSM; import com.conveyal.r5.analyst.LinkageCache; import com.conveyal.r5.analyst.WebMercatorGridPointSet; +import com.conveyal.r5.analyst.cluster.TransportNetworkConfig; import com.conveyal.r5.analyst.error.TaskError; import com.conveyal.r5.analyst.fare.InRoutingFareCalculator; import com.conveyal.r5.analyst.scenario.Scenario; +import com.conveyal.r5.common.JsonUtilities; import com.conveyal.r5.kryo.KryoNetworkSerializer; import com.conveyal.r5.profile.StreetMode; import com.conveyal.r5.streets.StreetLayer; @@ -77,8 +79,6 @@ public class TransportNetwork implements Serializable { */ public String scenarioId = null; - public static final String BUILDER_CONFIG_FILENAME = "build-config.json"; - public InRoutingFareCalculator fareCalculator; /** Non-fatal warnings encountered when applying the scenario, null on a base network */ @@ -96,7 +96,9 @@ public void rebuildTransientIndexes() { streetLayer.indexStreets(); transitLayer.rebuildTransientIndexes(); } - + public static TransportNetwork fromFiles (String osmSourceFile, List gtfsSourceFiles) { + return fromFiles(osmSourceFile, gtfsSourceFiles, null); + } /** * OSM PBF files are fragments of a single global database with a single namespace. Therefore it is valid to load * more than one PBF file into a single OSM storage object. However they might be from different points in time, so @@ -110,9 +112,11 @@ public void rebuildTransientIndexes() { * NOTE the feedId of the gtfs feeds loaded here will be the ones declared by the feeds or based on their filenames. * This method makes no effort to impose the more unique feed IDs created by the Analysis backend. */ + public static TransportNetwork fromFiles ( String osmSourceFile, - List gtfsSourceFiles + List gtfsSourceFiles, + String configFile ) throws DuplicateFeedException { // Load OSM data into MapDB to pass into network builder. OSM osm = new OSM(osmSourceFile + ".mapdb"); @@ -120,21 +124,37 @@ public static TransportNetwork fromFiles ( osm.readFromFile(osmSourceFile); // Supply feeds with a stream so they do not sit open in memory while other feeds are being processed. Stream feeds = gtfsSourceFiles.stream().map(GTFSFeed::readOnlyTempFileFromGtfs); - return fromInputs(osm, feeds); + if (configFile == null) { + return fromInputs(osm, feeds); + } else { + try { + // Use lenient mapper to mimic behavior in objectFromRequestBody. + TransportNetworkConfig config = JsonUtilities.lenientObjectMapper.readValue(configFile, + TransportNetworkConfig.class); + return fromInputs(osm, feeds, config); + } catch (IOException e) { + throw new RuntimeException("Error reading TransportNetworkConfig. Does it contain new unrecognized fields?", e); + } + } + } + + public static TransportNetwork fromInputs (OSM osm, Stream gtfsFeeds) { + return fromInputs(osm, gtfsFeeds, null); } /** - * This is the core method for building a street and transit network. It takes osm-lib and gtfs-lib objects as - * parameters. It is wrapped in various other methods that create those OSM and GTFS objects from filenames, input - * directories etc. The supplied OSM object must have intersections already detected. - * The GTFS feeds are supplied as a stream so that they can be loaded one by one on demand. + * This is the method for building a street and transit network locally (as opposed to + * TransportNetworkCache#buildNetworkfromConfig, which is used in cluster builds). This method takes osm-lib, + * gtfs-lib, and config objects as parameters. It is wrapped in various other methods that create those OSM and + * GTFS objects from filenames, input directories etc. The supplied OSM object must have intersections already + * detected. The GTFS feeds are supplied as a stream so that they can be loaded one by one on demand. */ - public static TransportNetwork fromInputs (OSM osm, Stream gtfsFeeds) { + public static TransportNetwork fromInputs (OSM osm, Stream gtfsFeeds, TransportNetworkConfig config) { // Create a transport network to hold the street and transit layers TransportNetwork transportNetwork = new TransportNetwork(); // Make street layer from OSM data in MapDB - StreetLayer streetLayer = new StreetLayer(); + StreetLayer streetLayer = new StreetLayer(config); transportNetwork.streetLayer = streetLayer; streetLayer.parentNetwork = transportNetwork; streetLayer.loadFromOsm(osm); @@ -176,14 +196,16 @@ public static TransportNetwork fromInputs (OSM osm, Stream gtfsFeeds) } /** - * Scan a directory detecting all the files that are network inputs, then build a network from those files. + * Scan a directory detecting all the files that are network inputs, then build a network from those files. This + * method is used in the PointToPointRouterServer, not the cluster-based analysis backend. * - * NOTE the feedId of the gtfs feeds laoded here will be the ones declared by the feeds or based on their filenames. + * NOTE the feedId of the gtfs feeds loaded here will be the ones declared by the feeds or based on their filenames. * This method makes no effort to impose the more unique feed IDs created by the Analysis backend. */ public static TransportNetwork fromDirectory (File directory) throws DuplicateFeedException { File osmFile = null; List gtfsFiles = new ArrayList<>(); + File configFile = null; for (File file : directory.listFiles()) { switch (InputFileType.forFile(file)) { case GTFS: @@ -198,6 +220,9 @@ public static TransportNetwork fromDirectory (File directory) throws DuplicateFe LOG.warn("Can only load one OSM file at a time."); } break; + case CONFIG: + LOG.info("Found config file {}", file); + configFile = file; case DEM: LOG.warn("DEM file '{}' not yet supported.", file); break; @@ -209,7 +234,11 @@ public static TransportNetwork fromDirectory (File directory) throws DuplicateFe LOG.error("An OSM PBF file is required to build a network."); return null; } else { - return fromFiles(osmFile.getAbsolutePath(), gtfsFiles); + if (configFile == null) { + return fromFiles(osmFile.getAbsolutePath(), gtfsFiles); + } else { + return fromFiles(osmFile.getAbsolutePath(), gtfsFiles, configFile.getAbsolutePath()); + } } } @@ -255,6 +284,7 @@ public static InputFileType forFile(File file) { if (name.endsWith(".pbf") || name.endsWith(".vex")) return OSM; if (name.endsWith(".tif") || name.endsWith(".tiff")) return DEM; // Digital elevation model (elevation raster) if (name.endsWith("network.dat")) return OUTPUT; + if (name.endsWith(".json")) return CONFIG; return OTHER; } } diff --git a/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java b/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java index 98db69a97..c67fe9d24 100644 --- a/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java +++ b/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java @@ -25,14 +25,9 @@ import javax.annotation.Nonnull; import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; import java.io.IOException; -import java.io.OutputStream; import java.util.List; import java.util.Set; -import java.util.zip.ZipEntry; -import java.util.zip.ZipInputStream; import static com.conveyal.file.FileCategory.BUNDLES; import static com.conveyal.file.FileCategory.DATASOURCES; @@ -207,10 +202,8 @@ private TransportNetworkConfig loadNetworkConfig (String networkId) { TransportNetworkConfig networkConfig = loadNetworkConfig(networkId); if (networkConfig == null) { // The switch to use JSON manifests instead of zips occurred in 32a1aebe in July 2016. - // Over six years have passed, buildNetworkFromBundleZip is deprecated and could probably be removed. - LOG.warn("No network config (aka manifest) found. Assuming old-format network inputs bundle stored as a single ZIP file."); - // FIXME Bundle ZIP building to reduce duplicate code. - network = buildNetworkFromBundleZip(networkId); + // buildNetworkFromBundleZip was deprecated for years then removed in 2024. + throw new RuntimeException("No network config (aka manifest) found."); } else { network = buildNetworkFromConfig(networkConfig); } @@ -243,70 +236,19 @@ private TransportNetworkConfig loadNetworkConfig (String networkId) { return network; } - /** Build a transport network given a network ID, using a zip of all bundle files in S3. */ - @Deprecated - private TransportNetwork buildNetworkFromBundleZip (String networkId) { - // The location of the inputs that will be used to build this graph - File dataDirectory = FileUtils.createScratchDirectory(); - FileStorageKey zipKey = new FileStorageKey(BUNDLES, networkId + ".zip"); - File zipFile = fileStorage.getFile(zipKey); - - try { - ZipInputStream zis = new ZipInputStream(new FileInputStream(zipFile)); - ZipEntry entry; - while ((entry = zis.getNextEntry()) != null) { - File entryDestination = new File(dataDirectory, entry.getName()); - if (!entryDestination.toPath().normalize().startsWith(dataDirectory.toPath())) { - throw new Exception("Bad zip entry"); - } - - // Are both these mkdirs calls necessary? - entryDestination.getParentFile().mkdirs(); - if (entry.isDirectory()) - entryDestination.mkdirs(); - else { - OutputStream entryFileOut = new FileOutputStream(entryDestination); - zis.transferTo(entryFileOut); - entryFileOut.close(); - } - } - zis.close(); - } catch (Exception e) { - // TODO delete cache dir which is probably corrupted. - LOG.warn("Error retrieving transportation network input files", e); - return null; - } - - // Now we have a local copy of these graph inputs. Make a graph out of them. - TransportNetwork network; - try { - network = TransportNetwork.fromDirectory(dataDirectory); - } catch (DuplicateFeedException e) { - LOG.error("Duplicate feeds in transport network {}", networkId, e); - throw new RuntimeException(e); - } - - // Set the ID on the network and its layers to allow caching linkages and analysis results. - network.scenarioId = networkId; - - return network; - } - /** * Build a network from a JSON TransportNetworkConfig in file storage. * This describes the locations of files used to create a bundle, as well as options applied at network build time. * It contains the unique IDs of the GTFS feeds and OSM extract. */ private TransportNetwork buildNetworkFromConfig (TransportNetworkConfig config) { - // FIXME duplicate code. All internal building logic should be encapsulated in a method like - // TransportNetwork.build(osm, gtfs1, gtfs2...) - // We currently have multiple copies of it, in buildNetworkFromConfig and buildNetworkFromBundleZip so you've - // got to remember to do certain things like set the network ID of the network in multiple places in the code. - // Maybe we should just completely deprecate bundle ZIPs and remove those code paths. + // FIXME All internal building logic should be encapsulated in a method like TransportNetwork.build(osm, + // gtfs1, gtfs2...) (see various methods in TransportNetwork). TransportNetwork network = new TransportNetwork(); - network.streetLayer = new StreetLayer(); + network.streetLayer = new StreetLayer(config); + network.streetLayer.loadFromOsm(osmCache.get(config.osmId)); network.streetLayer.parentNetwork = network;