AllenInstitute · pgarrison · May 28, 2026 · May 28, 2026 · May 28, 2026 · May 29, 2026
@@ -22,6 +22,19 @@ const PRE_DEFINED_COLUMNS = Object.values(PreDefinedColumn);
 
 const DATA_SOURCE_COLUMN = "Data source";
 
+// Suffix appended to every DuckDB file-handle name so that a short name like
+// "foo" can never prefix-match a longer name like "foo2".
+// See https://github.com/duckdb/duckdb-wasm/issues/2227
+//
+// This is a workaround, not a complete fix: a collision is still possible if a
+// user uploads a file whose name already ends with this suffix (e.g.
+// "foo-bff-filehandle.parquet"). A proper fix requires an upstream change in
+// duckdb-wasm to use exact-match lookups for registered file handles.
+const FILE_HANDLE_SUFFIX = "-bff-filehandle";
+function fileHandleName(name: string): string {
+    return name + FILE_HANDLE_SUFFIX;
+}
+
 // Map each actual column name to the predefined column name when they fuzzy-match.
 function getActualToPreDefinedColumnMap(columns: string[]): Map<string, string> {
     const map = new Map<string, string>();
@@ -213,9 +226,11 @@ export default abstract class DatabaseService {
             throw new Error("Database failed to initialize");
         }
 
+        const handle = fileHandleName(name);
+
         if (uri instanceof File) {
             await this.database.registerFileHandle(
-                name,
+                handle,
                 uri,
                 duckdb.DuckDBDataProtocol.BROWSER_FILEREADER,
                 true
@@ -225,29 +240,29 @@ export default abstract class DatabaseService {
                 ? duckdb.DuckDBDataProtocol.S3
                 : duckdb.DuckDBDataProtocol.HTTP;
 
-            await this.database.registerFileURL(name, uri, protocol, false);
+            await this.database.registerFileURL(handle, uri, protocol, false);
         }
 
         if (type === "parquet") {
             await this.createParquetDirectView(name);
         } else if (type === "json") {
-            await this.execute(`CREATE TABLE "${name}" AS FROM read_json_auto('${name}');`);
+            await this.execute(`CREATE TABLE "${name}" AS FROM read_json_auto('${handle}');`);
         } else {
             // Default to CSV. Use sample_size=-1 to scan the full file before deciding column
             // types, eliminating "first N rows look numeric, later rows have strings" failures.
             // Fall back to all_varchar=true if type inference fails (e.g. truly mixed-type column)
             // so the file always loads successfully.
             try {
                 await this.execute(
-                    `CREATE TABLE "${name}" AS FROM read_csv_auto('${name}', header=true, sample_size=-1);`
+                    `CREATE TABLE "${name}" AS FROM read_csv_auto('${handle}', header=true, sample_size=-1);`
                 );
             } catch {
                 console.warn(
                     `Failed to infer column types for CSV "${name}"; falling back to all_varchar=true. All columns will be loaded as strings.`
                 );
                 await this.execute(`DROP TABLE IF EXISTS "${name}"`);
                 await this.execute(
-                    `CREATE TABLE "${name}" AS FROM read_csv_auto('${name}', header=true, all_varchar=true);`
+                    `CREATE TABLE "${name}" AS FROM read_csv_auto('${handle}', header=true, all_varchar=true);`
                 );
             }
         }
@@ -798,7 +813,7 @@ export default abstract class DatabaseService {
             selectParts.push(`"filename" AS "${DATA_SOURCE_COLUMN}"`);
         }
         // 4. Create the view for this data source
-        const quotedNames = sourceNames.map((name) => `'${name}'`).join(", ");
+        const quotedNames = sourceNames.map((name) => `'${fileHandleName(name)}'`).join(", ");
         const createViewSql = `CREATE VIEW "${aggregateName}"
             AS SELECT ${selectParts.join(", ")}
             FROM parquet_scan(ARRAY[${quotedNames}], union_by_name = true);`;
@@ -1187,7 +1202,7 @@ export default abstract class DatabaseService {
     // Similar to getColumnsOnDataSource below, but suitable for use during the
     // data source preparation step.
     private async getRawParquetColumns(name: string): Promise<string[]> {
-        const sql = `DESCRIBE SELECT * FROM parquet_scan("${name}")`;
+        const sql = `DESCRIBE SELECT * FROM parquet_scan("${fileHandleName(name)}")`;
         const rows = await this.query(sql).promise;
         return rows.map((row) => row["column_name"] as string);
     }

@@ -302,7 +302,7 @@ describe("DatabaseService", () => {
 
             public query(sql: string): { promise: Promise<{ [key: string]: any }[]> } {
                 const parquetDescribeMatch = sql.match(
-                    /DESCRIBE SELECT \* FROM parquet_scan\("(.+)"\)/
+                    /DESCRIBE SELECT \* FROM parquet_scan\("(.+)-bff-filehandle"\)/
                 );
                 if (parquetDescribeMatch) {
                     const sourceName = parquetDescribeMatch[1];
@@ -351,6 +351,27 @@ describe("DatabaseService", () => {
             );
         });
 
+        it("uses suffixed file handle names in parquet_scan to avoid prefix collisions", async () => {
+            // Regression: if "foo" and "foo2" are registered as-is, DuckDB
+            // prefix-matches "foo" against "foo2" (duckdb-wasm#2227).
+            // This test only verifies the suffix is applied in the generated SQL;
+            // actual collision prevention is a DuckDB-wasm integration concern.
+            const service = new MockAggregateParquetDatabaseService({
+                foo: ["file_path"],
+                foo2: ["file_path"],
+            });
+
+            await service.prepareDataSources([
+                { name: "foo", type: "parquet", uri: "https://example.com/foo.parquet" },
+                { name: "foo2", type: "parquet", uri: "https://example.com/foo2.parquet" },
+            ]);
+
+            const createViewSql = service.executedSQL.find((sql) => sql.includes("CREATE VIEW"));
+            expect(createViewSql).to.not.be.undefined;
+            expect(createViewSql).to.match(/parquet_scan\(ARRAY\[.*'foo-bff-filehandle'.*]/);
+            expect(createViewSql).to.match(/parquet_scan\(ARRAY\[.*'foo2-bff-filehandle'.*]/);
+        });
+
         it("creates aggregate parquet view using union_by_name and data source projection", async () => {
             const service = new MockAggregateParquetDatabaseService({
                 "a.parquet": ["file_path"],