Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CSV Export] Add support for arbitrarily large exports #2125

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion firestore/firestore.indexes.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,32 @@
{
"indexes": []
"indexes": [
{
"collectionGroup": "lois",
"queryScope": "COLLECTION",
"fields": [
{
"fieldPath": "`2`", // job_id
"order": "ASCENDING"
},
{
"fieldPath": "`1`", // id
"order": "ASCENDING"
}
]
},
{
"collectionGroup": "submissions",
"queryScope": "COLLECTION",
"fields": [
{
"fieldPath": "`4`", // job_id
"order": "ASCENDING"
},
{
"fieldPath": "`2`", // loi_id
"order": "ASCENDING"
}
]
}
]
}
97 changes: 47 additions & 50 deletions functions/src/common/datastore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
import {GroundProtos} from '@ground/proto';

import Pb = GroundProtos.ground.v1beta1;
import {leftOuterJoinSorted, QueryIterator} from './query-iterator';

const l = registry.getFieldIds(Pb.LocationOfInterest);
const sb = registry.getFieldIds(Pb.Submission);

Expand All @@ -29,7 +31,7 @@
*/
type pseudoGeoJsonGeometry = {
type: string;
coordinates: any;

Check warning on line 34 in functions/src/common/datastore.ts

View workflow job for this annotation

GitHub Actions / Check

Unexpected any. Specify a different type
};

/**
Expand Down Expand Up @@ -132,66 +134,61 @@
return this.db_.doc(job(surveyId, jobId)).get();
}

fetchAccessibleSubmissionsByJobId(
surveyId: string,
jobId: string,
userId?: string
) {
if (!userId) {
return this.db_
.collection(submissions(surveyId))
.where(sb.jobId, '==', jobId)
.get();
} else {
return this.db_
.collection(submissions(surveyId))
.where(sb.jobId, '==', jobId)
.where(sb.ownerId, '==', userId)
.get();
}
}

fetchLocationOfInterest(surveyId: string, loiId: string) {
return this.fetchDoc_(loi(surveyId, loiId));
}

async fetchAccessibleLocationsOfInterestByJobId(
surveyId: string,
jobId: string,
userId?: string
): Promise<DocumentData[]> {
if (!userId) {
return (
await this.db_
.collection(lois(surveyId))
.where(l.jobId, '==', jobId)
.get()
).docs;
} else {
const importedLois = this.db_
.collection(lois(surveyId))
.where(l.jobId, '==', jobId)
.where(l.source, '==', Pb.LocationOfInterest.Source.IMPORTED);

const fieldDataLois = this.db_
.collection(lois(surveyId))
.where(l.jobId, '==', jobId)
.where(l.source, '==', Pb.LocationOfInterest.Source.FIELD_DATA)
.where(l.ownerId, '==', userId);

const [importedLoisSnapshot, fieldDataLoisSnapshot] = await Promise.all([
importedLois.get(),
fieldDataLois.get(),
]);

return [...importedLoisSnapshot.docs, ...fieldDataLoisSnapshot.docs];
}
fetchLocationsOfInterest(surveyId: string, jobId: string) {
return this.db_
.collection(lois(surveyId))
.where(l.jobId, '==', jobId)
.get();
}

fetchSheetsConfig(surveyId: string) {
return this.fetchDoc_(`${survey(surveyId)}/sheets/config`);
}

/**
* Fetches Location of Interests (LOIs) and their associated submissions for a given survey and job.
*
* @param surveyId The ID of the survey.
* @param jobId The ID of the job.
* @param ownerId The optional ID of the owner to filter submissions by.
* @param page The page number for pagination (used with the `QueryIterator`).
* @returns A Promise that resolves to an array of joined LOI and submission documents.
*/
async fetchLoisSubmissions(
rfontanarosa marked this conversation as resolved.
Show resolved Hide resolved
surveyId: string,
jobId: string,
ownerId: string | undefined,
page: number
) {
const loisQuery = this.db_
.collection(lois(surveyId))
.where(l.jobId, '==', jobId)
.orderBy(l.id);
let submissionsQuery = this.db_
.collection(submissions(surveyId))
.where(sb.jobId, '==', jobId)
.orderBy(sb.loiId);
if (ownerId) {
submissionsQuery = submissionsQuery.where(sb.ownerId, '==', ownerId);
}
const loisIterator = new QueryIterator(loisQuery, page, l.id);
const submissionsIterator = new QueryIterator(
submissionsQuery,
page,
sb.loiId
);
return leftOuterJoinSorted(
loisIterator,
loiDoc => loiDoc.get(l.id),
submissionsIterator,
submissionDoc => submissionDoc.get(sb.loiId)
);
}

async insertLocationOfInterest(surveyId: string, loiDoc: DocumentData) {
await this.db_.doc(survey(surveyId)).collection('lois').add(loiDoc);
}
Expand Down Expand Up @@ -220,7 +217,7 @@
await loiRef.update({[l.properties]: loiDoc[l.properties]});
}

static toFirestoreMap(geometry: any) {

Check warning on line 220 in functions/src/common/datastore.ts

View workflow job for this annotation

GitHub Actions / Check

Unexpected any. Specify a different type
return Object.fromEntries(
Object.entries(geometry).map(([key, value]) => [
key,
Expand All @@ -229,7 +226,7 @@
);
}

static toFirestoreValue(value: any): any {

Check warning on line 229 in functions/src/common/datastore.ts

View workflow job for this annotation

GitHub Actions / Check

Unexpected any. Specify a different type

Check warning on line 229 in functions/src/common/datastore.ts

View workflow job for this annotation

GitHub Actions / Check

Unexpected any. Specify a different type
if (value === null) {
return null;
}
Expand Down Expand Up @@ -257,7 +254,7 @@
*
* @returns GeoJSON geometry object (with geometry as list of lists)
*/
static fromFirestoreMap(geoJsonGeometry: any): any {

Check warning on line 257 in functions/src/common/datastore.ts

View workflow job for this annotation

GitHub Actions / Check

Unexpected any. Specify a different type

Check warning on line 257 in functions/src/common/datastore.ts

View workflow job for this annotation

GitHub Actions / Check

Unexpected any. Specify a different type
const geometryObject = geoJsonGeometry as pseudoGeoJsonGeometry;
if (!geometryObject) {
throw new Error(
Expand All @@ -272,7 +269,7 @@
return geometryObject;
}

static fromFirestoreValue(coordinates: any) {

Check warning on line 272 in functions/src/common/datastore.ts

View workflow job for this annotation

GitHub Actions / Check

Unexpected any. Specify a different type
if (coordinates instanceof GeoPoint) {
// Note: GeoJSON coordinates are in lng-lat order.
return [coordinates.longitude, coordinates.latitude];
Expand All @@ -281,7 +278,7 @@
if (typeof coordinates !== 'object') {
return coordinates;
}
const result = new Array<any>(coordinates.length);

Check warning on line 281 in functions/src/common/datastore.ts

View workflow job for this annotation

GitHub Actions / Check

Unexpected any. Specify a different type

Object.entries(coordinates).map(([i, nestedValue]) => {
const index = Number.parseInt(i);
Expand Down
148 changes: 148 additions & 0 deletions functions/src/common/query-iterator.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
/**
* Copyright 2024 The Ground Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import {
Query,
QueryDocumentSnapshot,
QuerySnapshot,
} from 'firebase-admin/firestore';

/**
* An asynchronous iterator class that allows for iterating over the results of a Firestore query in batches.
*/
export class QueryIterator implements AsyncIterator<QueryDocumentSnapshot> {
rfontanarosa marked this conversation as resolved.
Show resolved Hide resolved
private querySnapshot: QuerySnapshot | null = null;
private currentIndex = 0;
private lastDocument: QueryDocumentSnapshot | null = null;

/**
* Creates a new QueryIterator.
*
* @param query The Firestore query to iterate over.
* @param pageSize The number of documents to fetch in each batch.
* @param orderField The field to order documents by (optional).
*/
constructor(
private query: Query,
private pageSize: number,
private orderField: string
) {}

/**
* Fetches the next batch of documents and returns the next document in the iterator.
*
* @returns A promise that resolves to an `IteratorResult` object. The `value` property
* will be the next `QueryDocumentSnapshot` if there are more documents, or `undefined`
* if there are no more documents. The `done` property indicates whether there are
* more documents to iterate over.
*/
async next(): Promise<IteratorResult<QueryDocumentSnapshot>> {
if (
this.querySnapshot === null ||
this.currentIndex >= this.querySnapshot.size
) {
// Fetch next batch of documents
let q = this.query.limit(this.pageSize);
if (this.lastDocument) {
q = q.startAfter([this.lastDocument?.get(this.orderField)]);
}
this.querySnapshot = await q.get();
this.currentIndex = 0;
}
if (this.querySnapshot.size > 0) {
const document = this.querySnapshot.docs[this.currentIndex++];
this.lastDocument = document; // Update last document for next batch
return {
value: document,
done: false,
};
} else {
return {
value: undefined,
done: true,
};
}
}
}

/**
* Performs a left outer join operation on two asynchronous iterators with sorting.
*
* This function iterates through an asynchronous iterator of left elements (`leftIterator`)
* and an asynchronous iterator of right elements (`rightIterator`). It performs a left outer join
* based on the keys extracted from each element using the provided functions `getLeftKey` and `getRightKey`.
*
* The function yields pairs of elements from the left and right iterators. If there's no matching element
* from the right iterator for a left element, the function yields a pair with the left element's value and
* `undefined` for the right element (left outer join behavior).
*
* Both iterators are expected to be sorted by their respective keys for optimal performance.
*
* @template T The type of elements in the left iterator.
* @template U The type of elements in the right iterator.
*
* @param leftIterator The asynchronous iterator of left elements.
* @param getLeftKey A function that extracts the key for comparison from a left element.
* @param rightIterator The asynchronous iterator of right elements.
* @param getRightKey A function that extracts the key for comparison from a right element.
*
* @returns An asynchronous generator that yields pairs of elements from the left and right iterators.
* Each pair is an array containing the left element's value and the matching right element's value
* (or `undefined` if no match is found).
*/
export async function* leftOuterJoinSorted<T, U>(
leftIterator: AsyncIterator<T>,
getLeftKey: (left: T) => any,

Check warning on line 108 in functions/src/common/query-iterator.ts

View workflow job for this annotation

GitHub Actions / Check

Unexpected any. Specify a different type
rightIterator: AsyncIterator<U>,
getRightKey: (right: U) => any

Check warning on line 110 in functions/src/common/query-iterator.ts

View workflow job for this annotation

GitHub Actions / Check

Unexpected any. Specify a different type
): AsyncGenerator<[T, U | undefined]> {
let leftItem = await leftIterator.next();
let rightItem = await rightIterator.next();
let rightItemsFound = 0;

// This loop iterates through the left iterator until it's exhausted.
// In each iteration, it compares the current left item's key with the current
// right item's key (if there's a right item remaining).
while (!leftItem.done) {
const leftKey = getLeftKey(leftItem.value);
const rightKey = rightItem.done ? undefined : getRightKey(rightItem.value);
// Check for these conditions:
// 1. Right iterator is done (no more items on the right).
// 2. Left item's key is less than the right item's key (mismatch).
if (rightItem.done || leftKey < rightKey) {
// The left item has no matching item on the right (or right iterator is done).
// If no matching items were found on the right side for the current left item
// (or the right iterator has reached its end), yield a pair
// consisting of the left item's value and undefined.
if (rightItemsFound === 0) yield [leftItem.value, undefined];
// Move to the next left item and reset the counter for matches.
leftItem = await leftIterator.next();
rightItemsFound = 0;
} else if (leftKey > rightKey) {
// The right item's key is less than the left item's key (mismatch).
// Advance the right iterator to find a possible match for the current left item.
rightItem = await rightIterator.next();
} else {
// Match found! The keys of the left and right items are equal.
// Increment the counter for the number of matches found for the current left item.
rightItemsFound++;
// Yield a pair with the left item's value, the matching right item's value,
// and the current count of matches for the left item.
yield [leftItem.value, rightItem.value];
rightItem = await rightIterator.next();
}
}
}
Loading
Loading