Skip to content
62 changes: 37 additions & 25 deletions integration_test/gce-testing-internal/gce/gce_testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -604,11 +604,10 @@ func AssertMetricMissing(ctx context.Context, logger *log.Logger, vm *VM, metric
return nil
}

// hasMatchingLog looks in the logging backend for a log matching the given query,
// findMatchingLogs looks in the logging backend for logs matching the given query,
// over the trailing time interval specified by the given window.
// Returns a boolean indicating whether the log was present in the backend,
// plus the first log entry found, or an error if the lookup failed.
func hasMatchingLog(ctx context.Context, logger *log.Logger, vm *VM, logNameRegex string, window time.Duration, query string) (bool, *cloudlogging.Entry, error) {
// Returns all the matching log entries found, or an error if the lookup failed.
func findMatchingLogs(ctx context.Context, logger *log.Logger, vm *VM, logNameRegex string, window time.Duration, query string) ([]*cloudlogging.Entry, error) {
start := time.Now().Add(-window)

t := start.Format(time.RFC3339)
Expand All @@ -620,29 +619,25 @@ func hasMatchingLog(ctx context.Context, logger *log.Logger, vm *VM, logNameRege

logClient, err := logClients.new(vm.Project)
if err != nil {
return false, nil, fmt.Errorf("hasMatchingLog() failed to obtain logClient for project %v: %v", vm.Project, err)
return nil, fmt.Errorf("findMatchingLogs() failed to obtain logClient for project %v: %v", vm.Project, err)
}
it := logClient.Entries(ctx, logadmin.Filter(filter))
found := false

var first *cloudlogging.Entry
// Loop through the iterator printing out each matching log entry. We could return true on the
// first match, but it's nice for debugging to print out all matches into the logs.
var matchingLogs []*cloudlogging.Entry
// Loop through the iterator printing out each matching log entry.
// It is helpful for debugging to print out all matches into the logs.
for {
entry, err := it.Next()
if err == iterator.Done {
break
}
if err != nil {
return false, nil, err
return nil, err
}
logger.Printf("Found matching log entry: %v", entry)
found = true
if first == nil {
first = entry
}
matchingLogs = append(matchingLogs, entry)
}
return found, first, nil
return matchingLogs, nil
}

// WaitForLog looks in the logging backend for a log matching the given query,
Expand All @@ -660,42 +655,59 @@ func shouldRetryHasMatchingLog(err error) bool {
strings.Contains(err.Error(), "Internal error encountered")
}

// QueryLog looks in the logging backend for a log matching the given query,
// QueryLog looks in the logging backend a log matching the given query,
// over the trailing time interval specified by the given window.
// Returns the first log entry found, or an error if the log could not be
// found after some retries.
func QueryLog(ctx context.Context, logger *log.Logger, vm *VM, logNameRegex string, window time.Duration, query string, maxAttempts int) (*cloudlogging.Entry, error) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should be able to implement QueryLog by just calling QueryAllLogs internally and returning the first element of the slice.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

matchingLogs, err := QueryAllLogs(ctx, logger, vm, logNameRegex, window, query, maxAttempts)
if err != nil {
return nil, err
}
if len(matchingLogs) > 0 {
return matchingLogs[0], nil
}
return nil, fmt.Errorf("QueryLog() failed: %s not found, exhausted retries", logNameRegex)
}

// QueryAllLog looks in the logging backend for logs matching the given query,
// over the trailing time interval specified by the given window.
// Returns all the log entries found, or an error if no log could not be
// found after some retries.
func QueryAllLogs(ctx context.Context, logger *log.Logger, vm *VM, logNameRegex string, window time.Duration, query string, maxAttempts int) ([]*cloudlogging.Entry, error) {
Copy link
Member

@jefferbrecht jefferbrecht Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

QueryAllLogs arguably does not need the "retry until at least one log is found" logic (although the "retry on retriable failure" logic should stay). IMO that's only there for "get me the first matching log ASAP" (which is just QueryLog), whereas here the caller is more likely to be looking for "get me all matching logs from the given time window" (after they've waited an appropriate amount of time for the eventual consistency of the backend).

Copy link
Contributor Author

@franciscovalentecastro franciscovalentecastro Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see how to remove "retry until at least one log is found" from QueryAllLogs without having some downsides.

@jefferbrecht Could you add more details in the expected solution here ?

IMO that's only there for "get me the first matching log ASAP"

I think it could also be thought as "return query result if you find any data". It has the additional value of shorter tests.

QueryAllLogs arguably does not need the "retry until at least one log is found" logic (although the "retry on retriable failure" logic should stay).

Yes, "get me all matching logs from the given time window" maybe the most likely use case of QueryAllLogs, but i don't see an alternative way of "stopping the lookup" that is more helpful for testing.

Alternatives :

  • [Current] Stops querying the backend if any data is found (len(matchingLogs) > 0).
    • Shorter tests.
    • Caller is expecting to see "all found data".
  • Stop when err == nil
    • Shorter tests. May result in "no data found".
    • No consideration for race conditions when log may take longer to be ingested.
  • Create an expectedLogCount and stop when len(matchingLogs) >= expectedLogCount.
    • Same as the previous one, but more restrictive.
  • Remove the "stop after data is found" will result in the always querying "maxAttempts".
    • The tests is long (maxAttempt * backoffTime) and if the time window is small (e.g. 1 minute), the last query could return less data after backoff retries.

Copy link
Contributor Author

@franciscovalentecastro franciscovalentecastro Jan 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On further thought, i still think stopping the query at len(matchingLogs) > 0 is the best option since its the most aligned with how the other QueryLog and WaitForLog functions have the expectation of finding data (at least one log).

To expectzero logs we can use AssertLogMissing.

for attempt := 1; attempt <= maxAttempts; attempt++ {
found, first, err := hasMatchingLog(ctx, logger, vm, logNameRegex, window, query)
if found {
// Success.
return first, nil
matchingLogs, err := findMatchingLogs(ctx, logger, vm, logNameRegex, window, query)
if err == nil {
if len(matchingLogs) > 0 {
// Success.
return matchingLogs, nil
}
}
logger.Printf("Query returned found=%v, err=%v, attempt=%d", found, err, attempt)
logger.Printf("Query returned matchingLogs=%v, err=%v, attempt=%d", matchingLogs, err, attempt)
if err != nil && !shouldRetryHasMatchingLog(err) {
// A non-retryable error.
return nil, fmt.Errorf("QueryLog() failed: %v", err)
}
// found was false, or we hit a retryable error.
time.Sleep(logQueryBackoffDuration)
}
return nil, fmt.Errorf("QueryLog() failed: %s not found, exhausted retries", logNameRegex)
return nil, fmt.Errorf("QueryAllLogs() failed: %s not found, exhausted retries", logNameRegex)
}

// AssertLogMissing looks in the logging backend for a log matching the given query
// and returns success if no data is found. To consider possible transient errors
// while querying the backend we make queryMaxAttemptsLogMissing query attempts.
func AssertLogMissing(ctx context.Context, logger *log.Logger, vm *VM, logNameRegex string, window time.Duration, query string) error {
for attempt := 1; attempt <= queryMaxAttemptsLogMissing; attempt++ {
found, _, err := hasMatchingLog(ctx, logger, vm, logNameRegex, window, query)
matchingLogs, err := findMatchingLogs(ctx, logger, vm, logNameRegex, window, query)
if err == nil {
if found {
if len(matchingLogs) > 0 {
return fmt.Errorf("AssertLogMissing(log=%q): %v failed: unexpectedly found data for log", query, err)
}
// Success
return nil
}
logger.Printf("Query returned found=%v, err=%v, attempt=%d", found, err, attempt)
logger.Printf("Query returned matchingLogs=%v, err=%v, attempt=%d", matchingLogs, err, attempt)
if err != nil && !shouldRetryHasMatchingLog(err) {
// A non-retryable error.
return fmt.Errorf("AssertLogMissing() failed: %v", err)
Expand Down
Loading