Skip to content

Commit eadd94b

Browse files
committed
WIP
1 parent b4ecf93 commit eadd94b

File tree

2 files changed

+94
-42
lines changed

2 files changed

+94
-42
lines changed

packages/compass-collection/src/components/mock-data-generator-modal/types.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ type MockDataGeneratorInProgressState = {
1919

2020
type MockDataGeneratorCompletedState = {
2121
status: 'completed';
22-
fakerSchema: FakerSchemaMapping[];
22+
fakerSchema: FakerSchema;
2323
requestId: string;
2424
};
2525

@@ -39,3 +39,12 @@ export type FakerSchemaMapping = Omit<
3939
MockDataSchemaResponse['content']['fields'][number],
4040
'isArray'
4141
>;
42+
43+
export type FakerFieldMapping = {
44+
mongoType: string;
45+
fakerMethod: string;
46+
fakerArgs: any[];
47+
probability: number;
48+
};
49+
50+
export type FakerSchema = Record<string, FakerFieldMapping>;

packages/compass-collection/src/modules/collection-tab.ts

Lines changed: 84 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,10 @@ import type { Document, MongoError } from 'mongodb';
3737
import { MockDataGeneratorStep } from '../components/mock-data-generator-modal/types';
3838
import type {
3939
FakerSchemaMapping,
40+
FakerSchema,
4041
MockDataGeneratorState,
4142
} from '../components/mock-data-generator-modal/types';
4243

43-
// @ts-expect-error TypeScript warns us about importing ESM module from CommonJS module, but we can ignore since this code will be consumed by webpack.
4444
import { faker } from '@faker-js/faker/locale/en';
4545

4646
const DEFAULT_SAMPLE_SIZE = 100;
@@ -182,7 +182,7 @@ export interface FakerMappingGenerationStartedAction {
182182

183183
export interface FakerMappingGenerationCompletedAction {
184184
type: CollectionActions.FakerMappingGenerationCompleted;
185-
fakerSchema: FakerSchemaMapping[];
185+
fakerSchema: FakerSchema;
186186
requestId: string;
187187
}
188188

@@ -696,64 +696,107 @@ export const cancelSchemaAnalysis = (): CollectionThunkAction<void> => {
696696
};
697697
};
698698

699+
/**
700+
* Transforms LLM array format to keyed object structure.
701+
* Moves fieldPath from object property to object key.
702+
*/
703+
function transformFakerSchemaToObject(
704+
fakerSchema: FakerSchemaMapping[]
705+
): FakerSchema {
706+
const result: FakerSchema = {};
707+
708+
for (const field of fakerSchema) {
709+
const { fieldPath, ...fieldMapping } = field;
710+
result[fieldPath] = fieldMapping;
711+
}
712+
713+
return result;
714+
}
715+
716+
/**
717+
* Checks if the method exists and is callable on the faker object.
718+
*/
719+
function isValidFakerMethod(fakerMethod: string): boolean {
720+
const parts = fakerMethod.split('.');
721+
722+
// Validate format: exactly module.method
723+
if (parts.length !== 2) {
724+
return false;
725+
}
726+
727+
const [moduleName, methodName] = parts;
728+
729+
try {
730+
const fakerModule = (faker as unknown as Record<string, unknown>)[
731+
moduleName
732+
];
733+
return (
734+
fakerModule !== null &&
735+
fakerModule !== undefined &&
736+
typeof fakerModule === 'object' &&
737+
typeof (fakerModule as Record<string, unknown>)[methodName] === 'function'
738+
);
739+
} catch {
740+
return false;
741+
}
742+
}
743+
699744
/**
700745
* Validates a given faker schema against an input schema.
701746
*
702-
* - Filters out fields from the faker schema that do not exist in the input schema.
703-
* - Validates the `fakerMethod` for each field, marking it as unrecognized if invalid.
704-
* - Adds any unmapped input schema fields to the result with an unrecognized faker method.
747+
* - Transforms LLM array format to keyed object structure
748+
* - Validates the `fakerMethod` for each field, marking it as unrecognized if invalid
749+
* - Adds any unmapped input schema fields to the result with an unrecognized faker method
705750
*
706751
* @param inputSchema - The schema definition for the input, mapping field names to their metadata.
707-
* @param fakerSchema - The array of faker schema mappings to validate and map.
752+
* @param fakerSchemaArray - The array of faker schema mappings from LLM to validate and map.
708753
* @param logger - Logger instance used to log warnings for invalid faker methods.
709-
* @returns An array of validated faker schema mappings, including all input schema fields.
754+
* @returns A keyed object of validated faker schema mappings, with one-to-one fields with input schema.
710755
*/
711756
const validateFakerSchema = (
712757
inputSchema: Record<string, FieldInfo>,
713-
fakerSchema: FakerSchemaMapping[],
758+
fakerSchemaArray: FakerSchemaMapping[],
714759
logger: Logger
715-
): FakerSchemaMapping[] => {
716-
const inputSchemaFields = Object.keys(inputSchema);
717-
const validatedFakerSchema = fakerSchema
718-
// Drop fields that don't match the input schema structure
719-
.filter((field) => inputSchema[field.fieldPath])
720-
.map((field) => {
721-
const { fakerMethod } = field;
722-
723-
// validate faker method
724-
const [moduleName, methodName, ...rest] = fakerMethod.split('.');
725-
if (
726-
rest.length > 0 ||
727-
typeof (faker as any)[moduleName]?.[methodName] !== 'function'
728-
) {
760+
): FakerSchema => {
761+
// Transform to keyed object structure
762+
const fakerSchemaRaw = transformFakerSchemaToObject(fakerSchemaArray);
763+
764+
const result: FakerSchema = {};
765+
766+
// Process all input schema fields in a single O(n) pass
767+
for (const fieldPath of Object.keys(inputSchema)) {
768+
const fakerMapping = fakerSchemaRaw[fieldPath];
769+
770+
if (fakerMapping) {
771+
// Validate the faker method
772+
if (isValidFakerMethod(fakerMapping.fakerMethod)) {
773+
result[fieldPath] = fakerMapping;
774+
} else {
729775
logger.log.warn(
730776
mongoLogId(1_001_000_372),
731777
'Collection',
732778
'Invalid faker method',
733-
{ fakerMethod }
779+
{ fakerMethod: fakerMapping.fakerMethod }
734780
);
735-
return {
736-
...field,
781+
result[fieldPath] = {
782+
mongoType: fakerMapping.mongoType,
737783
fakerMethod: UNRECOGNIZED_FAKER_METHOD,
784+
fakerArgs: fakerMapping.fakerArgs,
785+
probability: fakerMapping.probability,
738786
};
739787
}
788+
} else {
789+
// Field not mapped by LLM - add default
790+
result[fieldPath] = {
791+
mongoType: inputSchema[fieldPath].type,
792+
fakerMethod: UNRECOGNIZED_FAKER_METHOD,
793+
fakerArgs: [],
794+
probability: 1,
795+
};
796+
}
797+
}
740798

741-
return field;
742-
});
743-
const unmappedInputFields = inputSchemaFields.filter(
744-
(field) =>
745-
!validatedFakerSchema.find(({ fieldPath }) => fieldPath === field)
746-
);
747-
// Default unmapped input fields to "Unrecognized" faker method
748-
const unmappedFields = unmappedInputFields.map((field) => ({
749-
fieldPath: field,
750-
fakerMethod: UNRECOGNIZED_FAKER_METHOD,
751-
mongoType: inputSchema[field].type,
752-
fakerArgs: [],
753-
probability: 1,
754-
}));
755-
756-
return [...validatedFakerSchema, ...unmappedFields];
799+
return result;
757800
};
758801

759802
export const generateFakerMappings = (): CollectionThunkAction<

0 commit comments

Comments
 (0)