Skip to content
This repository was archived by the owner on May 27, 2024. It is now read-only.
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions RULES.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ Rules are declared in the [`ValidationRules` class](https://github.com/CUTR-at-U
| [E050](#E050) | `timestamp` is in the future
| [E051](#E051) | GTFS-rt `stop_sequence` not found in GTFS data
| [E052](#E052) | `vehicle.id` is not unique
| [E053](#E053) | `start_time` for trip has changed

### Table of Warnings

Expand Down Expand Up @@ -717,6 +718,16 @@ From [VehiclePosition.VehicleDescriptor](https://github.com/google/transit/blob/
#### References:
* [`vehicle.id`](https://github.com/google/transit/blob/master/gtfs-realtime/spec/en/reference.md#message-vehicledescriptor)

<a name="E053"/>

### E053 - `start_time` for frequency-based trip has changed

A trip that is defined in frequencies.txt and has exact_times=0 or empty should have the same start_time in the descriptor for it's entire trip instance. A trip instance is defined as one journey of the vehicle from the starting stop_sequence for a trip_id in stop_times.txt to the ending stop_sequence for that same trip_id.

From [TripUpdate.TripDescriptor](https://github.com/google/transit/blob/master/gtfs-realtime/spec/en/reference.md#message-tripdescriptor) for `start_time`:

>If the trip corresponds to exact_times=0, then its start_time may be arbitrary, and is initially expected to be the first departure of the trip. Once established, the start_time of this frequency-based exact_times=0 trip should be considered immutable, even if the first departure time changes

# Warnings

<a name="W001"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,10 @@ public class ValidationRules {
public static final ValidationRule E052 = new ValidationRule("E052", "ERROR", "vehicle.id is not unique",
"Each vehicle should have a unique ID",
"which is used by more than one vehicle in the feed");

public static final ValidationRule E053 = new ValidationRule("E053", "ERROR", "start_time for frequency-based trip changed",
"An exact_times = 0 frequencies.txt trip should have the same start_time through it's trip",
"exact_times=0 must be immutable");

private static List<ValidationRule> mAllRules = new ArrayList<>();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
package edu.usf.cutr.gtfsrtvalidator.lib.validation.rules;

import com.google.transit.realtime.GtfsRealtime;
import com.google.transit.realtime.GtfsRealtime.TripUpdate;

import edu.usf.cutr.gtfsrtvalidator.lib.model.MessageLogModel;
import edu.usf.cutr.gtfsrtvalidator.lib.model.OccurrenceModel;
import edu.usf.cutr.gtfsrtvalidator.lib.model.helper.ErrorListHelperModel;
Expand All @@ -27,6 +29,7 @@
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

import static edu.usf.cutr.gtfsrtvalidator.lib.validation.ValidationRules.*;
Expand All @@ -37,21 +40,26 @@
* E006 - Missing required vehicle_position trip field for frequency-based exact_times = 0
* E013 - Frequency type 0 trip schedule_relationship should be UNSCHEDULED or empty
* W005 - Missing vehicle_id in trip_update for frequency-based exact_times = 0
* E053 - An exact_times = 0 frequencies.txt trip should have the same start_time through it's trip
*/
public class FrequencyTypeZeroValidator implements FeedEntityValidator {

private static final org.slf4j.Logger _log = LoggerFactory.getLogger(FrequencyTypeZeroValidator.class);

private HashMap<String, TripUpdate> previousTripUpdates=new HashMap<String, TripUpdate>();

@Override
public List<ErrorListHelperModel> validate(long currentTimeMillis, GtfsMutableDao gtfsData, GtfsMetadata gtfsMetadata, GtfsRealtime.FeedMessage feedMessage, GtfsRealtime.FeedMessage previousFeedMessage, GtfsRealtime.FeedMessage combinedFeedMessage) {
List<OccurrenceModel> errorListE006 = new ArrayList<>();
List<OccurrenceModel> errorListE013 = new ArrayList<>();
List<OccurrenceModel> errorListW005 = new ArrayList<>();
List<OccurrenceModel> errorListE053 = new ArrayList<>();

for (GtfsRealtime.FeedEntity entity : feedMessage.getEntityList()) {
if (entity.hasTripUpdate()) {

GtfsRealtime.TripUpdate tripUpdate = entity.getTripUpdate();

if (gtfsMetadata.getExactTimesZeroTripIds().contains(tripUpdate.getTrip().getTripId())) {
/**
* NOTE - W006 checks for missing trip_ids, because we can't check for that here - we need the trip_id to know if it's exact_times=0
Expand All @@ -75,9 +83,25 @@ public List<ErrorListHelperModel> validate(long currentTimeMillis, GtfsMutableDa
// W005 - Missing vehicle_id in trip_update for frequency-based exact_times = 0
RuleUtils.addOccurrence(W005, "trip_id " + tripUpdate.getTrip().getTripId(), errorListW005, _log);
}

if (tripUpdate.hasVehicle() && tripUpdate.getVehicle().hasId() && previousTripUpdates.get(tripUpdate.getVehicle().getId()) != null) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thinking more about this in context of TripUpdates, I think that we need to use a composite key for the HashMap - vehicle_id + trip_id.

In a TripUpdates feed, you can have multiple TripUpdate records that have the same vehicle_id. For example, if a block is composed of two trips:

  • trip_A
  • trip_B

You can have a TripUpdate for trip_A and a TripUpdate for trip_B in the feed at the same time for the same vehicle if the same vehicle is serving both trips in the block.

All exact_times=0 trips that I've seen in real GTFS datasets have been loops, so they don't have the above issue (they just repeat the same trip_id over and over). But, I believe it's possible to have two trips defined in trips.txt and frequencies.txt that would have two different frequencies for exact_times=0 trips that would be served by the same vehicle back-to-back.

@scrudden Let me know what you think of the above.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can also have two trip updates in a single feed with the same trip id/vehicle id and different start times and be correct.

Previous updates vehicle 1001

Trip_A_08:00
Trip_A_09:00

Possible current updates vehicle 1001

1 Updates for same trips as last update

Trip_A_08:00
Trip_A_09:00

2 Trip_A_08:00 completed so has updated for next two trips

Trip_A_09:00
Trip_A_10:00

So we need logic for these two scenarios. 1 is simple as if all match then OK. 2 Trip_A_10:00 is OK but how do we know?

Do we check that the start time (10:00) is after the latest matching start_time that is in both feeds for this vehicle?

Copy link
Member

@barbeau barbeau Sep 19, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there are 25 stops in a trip, and we assign a trip_instance value in this example just to keep track of them in our discussion, the TripUpdate data would probably look something like:

Previous updates vehicle 1001

Trip_A_08:00 - last stop_sequence = 20 (trip_instance=0)
Trip_A_09:00 - last stop_sequence = 10 (trip_instance=1)

Possible current updates vehicle 1001

1 Updates for same trips as last update

Trip_A_08:00 - last stop_sequence = 25 (trip_instance=0)
Trip_A_09:00 - last stop_sequence = 15 (trip_instance=1)

2 Trip_A_08:00 completed so has updated for next two trips

Trip_A_09:00 - last stop_sequence = 20 (trip_instance=1)
Trip_A_10:00 - last stop_sequence = 5 (trip_instance=2)


So for scenario 2, you'd be comparing:

  • Trip_A_09:00 - last stop_sequence = 20 (trip_instance=1) against
    • Trip_A_08:00 - last stop_sequence = 20 (trip_instance=0) // This would incorrectly log an error
    • Trip_A_09:00 - last stop_sequence = 10 (trip_instance=1) // This correctly does not log an error
  • Trip_A_10:00 - last stop_sequence = 5 (trip_instance=2) against
    • Trip_A_08:00 - last stop_sequence = 20 (trip_instance=0) // This correctly does not log an error
    • Trip_A_09:00 - last stop_sequence = 10 (trip_instance=1) // This correctly does not log an error

I think I got that right?

One issue with the above, though is that currently my suggestion of using vehicle_id + trip_id -> TripUpdate as a composite key would create collisions in a HashMap when executing scenario 2 (or any case where there are multiple trip_id + vehicle_ids in the previous message). So we'd have to create a HashMap of vehicle_id + trip_id -> List<TripUpdate> or something else to even be able to track this in code.

It would definitely be helpful to define these scenarios in the unit tests to see which ones we're covering and which we aren't. I'm fine with covering the cases we can, and then in the rule description acknowledging which cases we may have false positive or false negatives on.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See items changed in bold. The last stop sequence will be the last stop of the trip if there is an update for a subsequent trip.

Previous updates vehicle 1001

Trip_A_08:00 - last stop_sequence = 25 (trip_instance=0)
Trip_A_09:00 - last stop_sequence = 10 (trip_instance=1)

Possible current updates vehicle 1001

1 Updates for same trips as last update

Trip_A_08:00 - last stop_sequence = 25 (trip_instance=0)
Trip_A_09:00 - last stop_sequence = 15 (trip_instance=1)

2 Trip_A_08:00 completed so has updated for next two trips

Trip_A_09:00 - last stop_sequence = 25 (trip_instance=1)
Trip_A_10:00 - last stop_sequence = 5 (trip_instance=2)

I have already locally added a sorted list of trip updates to the HashMap for the current and previous messages (currently with a vehicle_id key but I will create a composite key with trip_id). I now need to consider the logic of the checks again. I think adding tests to match the above scenarios will be a start to this.

I'll check in what I have changed but it is very much a WIP.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The last stop sequence will be the last stop of the trip if there is an update for a subsequent trip.

That's not guaranteed, but I agree that will be typical behavior.

I have already locally added a sorted list of trip updates to the HashMap for the current and previous messages

For the strategy I was thinking of, I don't think we need a HashMap for the current message - we should just be able loop through the current message once. I'm not sure I understand why you're sorting by start_time, either - if possible I'd prefer to avoid sorts as we pay another runtime penalty of at least log(n).

I think adding tests to match the above scenarios will be a start to this.

I agree, that's the only way I'll be able to convince myself it works :). I had the same issue with some other tests that had a number of different permutations of field values.

I'll check in what I have changed but it is very much a WIP.

Ok, sounds good. Let me know when you want me to go through it again.

if (tripUpdate.getStopTimeUpdateCount() > 0 && previousTripUpdates.get(tripUpdate.getVehicle().getId()).getStopTimeUpdateCount() > 0) {
if (tripUpdate.getStopTimeUpdate(tripUpdate.getStopTimeUpdateCount() - 1).getStopSequence() >= previousTripUpdates.get(tripUpdate.getVehicle().getId()).getStopTimeUpdate(previousTripUpdates.get(tripUpdate.getVehicle().getId()).getStopTimeUpdateCount() - 1).getStopSequence()) {
// E053 - start time of trip not consistent for trip updates for frequency-based exact_times = 0
if (!tripUpdate.getTrip().getStartTime().equals(previousTripUpdates.get(tripUpdate.getVehicle().getId()).getTrip().getStartTime())) {

String errorMessage="vehicle_id " + tripUpdate.getVehicle().getId()+ " changed from "+previousTripUpdates.get(tripUpdate.getVehicle().getId()).getTrip().getStartTime() +" to " + tripUpdate.getTrip().getStartTime();
RuleUtils.addOccurrence(E053, errorMessage, errorListE053, _log);
}
}
}
}

// Need to store previous for checking E053. Not using previousFeedMessage on purpose.
if(tripUpdate.hasVehicle() && tripUpdate.getVehicle().hasId())
previousTripUpdates.put(tripUpdate.getVehicle().getId(), tripUpdate);
}
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because a VehiclePositions feed can contain a TripDescriptor too, we should also check for this same rule within the below if (entity.hasVehicle) { IF block.

See TripDescriptorValidator.java for examples of how I've factored out the code to methods for rules that are applied to TripUpdates and VehiclePositions entities.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thinking more, I think you'd need the current_stop_sequence field to be populated in order for this to be checked against a VehiclePosition entity...

if (entity.hasVehicle()) {
GtfsRealtime.VehiclePosition vehiclePosition = entity.getVehicle();
if (vehiclePosition.hasTrip() &&
Expand Down Expand Up @@ -119,6 +143,9 @@ public List<ErrorListHelperModel> validate(long currentTimeMillis, GtfsMutableDa
if (!errorListW005.isEmpty()) {
errors.add(new ErrorListHelperModel(new MessageLogModel(W005), errorListW005));
}
if (!errorListE053.isEmpty()) {
errors.add(new ErrorListHelperModel(new MessageLogModel(E053), errorListE053));
}
return errors;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -844,6 +844,6 @@ public void testIsInFuture() {
@Test
public void testGetAllRules() {
List<ValidationRule> rules = ValidationRules.getRules();
assertEquals(61, rules.size());
assertEquals(62, rules.size());
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package edu.usf.cutr.gtfsrtvalidator.lib.test.rules;

import com.google.transit.realtime.GtfsRealtime;
import com.google.transit.realtime.GtfsRealtime.FeedMessage;

import edu.usf.cutr.gtfsrtvalidator.lib.model.ValidationRule;
import edu.usf.cutr.gtfsrtvalidator.lib.test.FeedMessageTest;
import edu.usf.cutr.gtfsrtvalidator.lib.test.util.TestUtils;
Expand Down Expand Up @@ -262,4 +264,123 @@ public void testW005() {

clearAndInitRequiredFeedFields();
}

/**
* E053- inconsistent start time in trip descriptor for frequency-based exact_times = 0
*/
@Test
public void testE053() {

FrequencyTypeZeroValidator frequencyTypeZeroValidator = new FrequencyTypeZeroValidator();
Map<ValidationRule, Integer> expected = new HashMap<>();
for (int i = 0; i < 2; i++) {
GtfsRealtime.TripDescriptor.Builder tripDescriptorBuilder = GtfsRealtime.TripDescriptor.newBuilder();

tripDescriptorBuilder.setTripId("1");
tripDescriptorBuilder.setStartDate("4-24-2016");

tripDescriptorBuilder.setStartTime("08:00:00AM");

GtfsRealtime.VehicleDescriptor.Builder vehicleDescriptorBuilder = GtfsRealtime.VehicleDescriptor.newBuilder();

vehicleDescriptorBuilder.setId("1");

vehiclePositionBuilder.setVehicle(vehicleDescriptorBuilder.build());
vehiclePositionBuilder.setTimestamp(TimestampUtils.MIN_POSIX_TIME);
vehiclePositionBuilder.setTrip(tripDescriptorBuilder.build());
vehiclePositionBuilder.setVehicle(vehicleDescriptorBuilder.build());

feedEntityBuilder.setVehicle(vehiclePositionBuilder.build());

feedMessageBuilder.setEntity(0, feedEntityBuilder.build());

tripUpdateBuilder.setVehicle(vehicleDescriptorBuilder.build());
tripUpdateBuilder.setTrip(tripDescriptorBuilder.build());

for (int j = 0; j < i + 1; j++) {
GtfsRealtime.TripUpdate.StopTimeUpdate.Builder stopTimeUpdateBuilder = GtfsRealtime.TripUpdate.StopTimeUpdate.newBuilder();

stopTimeUpdateBuilder.setStopId("" + j);
stopTimeUpdateBuilder.setStopSequence(j);

GtfsRealtime.TripUpdate.StopTimeEvent.Builder stopTimeEventBuilder = GtfsRealtime.TripUpdate.StopTimeEvent.newBuilder();
stopTimeUpdateBuilder.setArrival(stopTimeEventBuilder.build());

tripUpdateBuilder.addStopTimeUpdate(stopTimeUpdateBuilder.build());
}


feedEntityBuilder.setTripUpdate(tripUpdateBuilder.build());

// Add vehicle_id to vehicle position - 1 warning
feedEntityBuilder.setVehicle(vehiclePositionBuilder.build());


feedMessageBuilder.setEntity(0, feedEntityBuilder.build());

results = frequencyTypeZeroValidator.validate(TimestampUtils.MIN_POSIX_TIME, bullRunnerGtfs, bullRunnerGtfsMetadata, feedMessageBuilder.build(), null, null);


}
expected.clear();
TestUtils.assertResults(expected, results);

for (int i = 0; i < 2; i++) {
GtfsRealtime.TripDescriptor.Builder tripDescriptorBuilder = GtfsRealtime.TripDescriptor.newBuilder();

tripDescriptorBuilder.setTripId("1");
tripDescriptorBuilder.setStartDate("4-24-2016");
if (i == 0) {
tripDescriptorBuilder.setStartTime("08:00:00AM");
}
if (i == 1) {
tripDescriptorBuilder.setStartTime("09:00:00AM");
}


GtfsRealtime.VehicleDescriptor.Builder vehicleDescriptorBuilder = GtfsRealtime.VehicleDescriptor.newBuilder();

vehicleDescriptorBuilder.setId("1");

vehiclePositionBuilder.setVehicle(vehicleDescriptorBuilder.build());
vehiclePositionBuilder.setTimestamp(TimestampUtils.MIN_POSIX_TIME);
vehiclePositionBuilder.setTrip(tripDescriptorBuilder.build());
vehiclePositionBuilder.setVehicle(vehicleDescriptorBuilder.build());

feedEntityBuilder.setVehicle(vehiclePositionBuilder.build());

feedMessageBuilder.setEntity(0, feedEntityBuilder.build());

tripUpdateBuilder.setVehicle(vehicleDescriptorBuilder.build());
tripUpdateBuilder.setTrip(tripDescriptorBuilder.build());

for (int j = 0; j < i + 1; j++) {
GtfsRealtime.TripUpdate.StopTimeUpdate.Builder stopTimeUpdateBuilder = GtfsRealtime.TripUpdate.StopTimeUpdate.newBuilder();

stopTimeUpdateBuilder.setStopId("" + j);
stopTimeUpdateBuilder.setStopSequence(j);

GtfsRealtime.TripUpdate.StopTimeEvent.Builder stopTimeEventBuilder = GtfsRealtime.TripUpdate.StopTimeEvent.newBuilder();
stopTimeUpdateBuilder.setArrival(stopTimeEventBuilder.build());

tripUpdateBuilder.addStopTimeUpdate(stopTimeUpdateBuilder.build());
}


feedEntityBuilder.setTripUpdate(tripUpdateBuilder.build());

// Add vehicle_id to vehicle position - 1 warning
feedEntityBuilder.setVehicle(vehiclePositionBuilder.build());


feedMessageBuilder.setEntity(0, feedEntityBuilder.build());

results = frequencyTypeZeroValidator.validate(TimestampUtils.MIN_POSIX_TIME, bullRunnerGtfs, bullRunnerGtfsMetadata, feedMessageBuilder.build(), null, null);

}
expected.put(ValidationRules.E053, 1);
TestUtils.assertResults(expected, results);

clearAndInitRequiredFeedFields();
}
}