Skip to content

Commit d907bce

Browse files
committed
feat(compass-collection): LLM Output Validation - Mock Data Generator CLOUDP-333855
1 parent 9c656bb commit d907bce

File tree

2 files changed

+243
-44
lines changed

2 files changed

+243
-44
lines changed

packages/compass-collection/src/components/mock-data-generator-modal/mock-data-generator-modal.spec.tsx

Lines changed: 172 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,37 +16,42 @@ import type { CollectionState } from '../../modules/collection-tab';
1616
import { default as collectionTabReducer } from '../../modules/collection-tab';
1717
import type { ConnectionInfo } from '@mongodb-js/connection-info';
1818
import type { MockDataSchemaResponse } from '@mongodb-js/compass-generative-ai';
19+
import type { SchemaAnalysisState } from '../../schema-analysis-types';
20+
21+
const defaultSchemaAnalysisState: SchemaAnalysisState = {
22+
status: 'complete',
23+
processedSchema: {
24+
name: {
25+
type: 'String',
26+
probability: 1.0,
27+
sample_values: ['John', 'Jane'],
28+
},
29+
},
30+
sampleDocument: { name: 'John' },
31+
schemaMetadata: { maxNestingDepth: 1, validationRules: null },
32+
};
1933

2034
describe('MockDataGeneratorModal', () => {
2135
async function renderModal({
2236
isOpen = true,
2337
currentStep = MockDataGeneratorStep.SCHEMA_CONFIRMATION,
2438
enableGenAISampleDocumentPassing = false,
2539
mockServices = createMockServices(),
40+
schemaAnalysis = defaultSchemaAnalysisState,
2641
connectionInfo,
2742
}: {
2843
isOpen?: boolean;
2944
enableGenAISampleDocumentPassing?: boolean;
3045
currentStep?: MockDataGeneratorStep;
3146
mockServices?: any;
3247
connectionInfo?: ConnectionInfo;
48+
schemaAnalysis?: SchemaAnalysisState;
3349
} = {}) {
3450
const initialState: CollectionState = {
3551
workspaceTabId: 'test-workspace-tab-id',
3652
namespace: 'test.collection',
3753
metadata: null,
38-
schemaAnalysis: {
39-
status: 'complete',
40-
processedSchema: {
41-
name: {
42-
type: 'String',
43-
probability: 1.0,
44-
sample_values: ['John', 'Jane'],
45-
},
46-
},
47-
sampleDocument: { name: 'John' },
48-
schemaMetadata: { maxNestingDepth: 1, validationRules: null },
49-
},
54+
schemaAnalysis,
5055
fakerSchemaGeneration: {
5156
status: 'idle',
5257
},
@@ -284,6 +289,33 @@ describe('MockDataGeneratorModal', () => {
284289
});
285290

286291
describe('on the schema editor step', () => {
292+
const mockSchemaAnalysis: SchemaAnalysisState = {
293+
...defaultSchemaAnalysisState,
294+
processedSchema: {
295+
name: {
296+
type: 'String',
297+
probability: 1.0,
298+
},
299+
age: {
300+
type: 'Int32',
301+
probability: 1.0,
302+
},
303+
email: {
304+
type: 'String',
305+
probability: 1.0,
306+
},
307+
username: {
308+
type: 'String',
309+
probability: 1.0,
310+
},
311+
},
312+
sampleDocument: {
313+
name: 'Jane',
314+
age: 99,
315+
316+
username: 'JaneDoe123',
317+
},
318+
};
287319
const mockServicesWithMockDataResponse = createMockServices();
288320
mockServicesWithMockDataResponse.atlasAiService.getMockDataSchema = () =>
289321
Promise.resolve({
@@ -336,19 +368,22 @@ describe('MockDataGeneratorModal', () => {
336368
fields: [],
337369
},
338370
}),
339-
1000
371+
1
340372
)
341373
);
342374

343-
await renderModal();
375+
await renderModal({ mockServices });
344376

345377
// advance to the schema editor step
346378
userEvent.click(screen.getByText('Confirm'));
347379
expect(screen.getByTestId('faker-schema-editor-loader')).to.exist;
348380
});
349381

350382
it('shows the faker schema editor when the faker schema generation is completed', async () => {
351-
await renderModal({ mockServices: mockServicesWithMockDataResponse });
383+
await renderModal({
384+
mockServices: mockServicesWithMockDataResponse,
385+
schemaAnalysis: mockSchemaAnalysis,
386+
});
352387

353388
// advance to the schema editor step
354389
userEvent.click(screen.getByText('Confirm'));
@@ -359,7 +394,10 @@ describe('MockDataGeneratorModal', () => {
359394
});
360395

361396
it('shows correct values for the faker schema editor', async () => {
362-
await renderModal({ mockServices: mockServicesWithMockDataResponse });
397+
await renderModal({
398+
mockServices: mockServicesWithMockDataResponse,
399+
schemaAnalysis: mockSchemaAnalysis,
400+
});
363401

364402
// advance to the schema editor step
365403
userEvent.click(screen.getByText('Confirm'));
@@ -402,6 +440,124 @@ describe('MockDataGeneratorModal', () => {
402440
);
403441
});
404442

443+
it('does not show any fields that are not in the input schema', async () => {
444+
const mockServices = createMockServices();
445+
mockServices.atlasAiService.getMockDataSchema = () =>
446+
Promise.resolve({
447+
content: {
448+
fields: [
449+
{
450+
fieldPath: 'name',
451+
mongoType: 'string',
452+
fakerMethod: 'person.firstName',
453+
fakerArgs: [],
454+
isArray: false,
455+
probability: 1.0,
456+
},
457+
{
458+
fieldPath: 'email',
459+
mongoType: 'string',
460+
fakerMethod: 'internet.email',
461+
fakerArgs: [],
462+
isArray: false,
463+
probability: 1.0,
464+
},
465+
],
466+
},
467+
});
468+
await renderModal({
469+
mockServices,
470+
});
471+
472+
// advance to the schema editor step
473+
userEvent.click(screen.getByText('Confirm'));
474+
475+
await waitFor(() => {
476+
expect(screen.getByTestId('faker-schema-editor')).to.exist;
477+
});
478+
479+
expect(screen.getByText('name')).to.exist;
480+
expect(screen.queryByText('email')).to.not.exist;
481+
});
482+
483+
it('shows unmapped fields as "Unrecognized"', async () => {
484+
const mockServices = createMockServices();
485+
mockServices.atlasAiService.getMockDataSchema = () =>
486+
Promise.resolve({
487+
content: {
488+
fields: [
489+
{
490+
fieldPath: 'name',
491+
mongoType: 'String',
492+
fakerMethod: 'person.firstName',
493+
fakerArgs: [],
494+
isArray: false,
495+
probability: 1.0,
496+
},
497+
{
498+
fieldPath: 'age',
499+
mongoType: 'Int32',
500+
fakerMethod: 'number.int',
501+
fakerArgs: [],
502+
isArray: false,
503+
probability: 1.0,
504+
},
505+
],
506+
},
507+
});
508+
509+
await renderModal({
510+
mockServices,
511+
schemaAnalysis: {
512+
...defaultSchemaAnalysisState,
513+
processedSchema: {
514+
name: {
515+
type: 'String',
516+
probability: 1.0,
517+
},
518+
age: {
519+
type: 'Int32',
520+
probability: 1.0,
521+
},
522+
type: {
523+
type: 'String',
524+
probability: 1.0,
525+
sample_values: ['cat', 'dog'],
526+
},
527+
},
528+
sampleDocument: { name: 'Peaches', age: 10, type: 'cat' },
529+
},
530+
});
531+
532+
// advance to the schema editor step
533+
userEvent.click(screen.getByText('Confirm'));
534+
535+
await waitFor(() => {
536+
expect(screen.getByTestId('faker-schema-editor')).to.exist;
537+
});
538+
539+
// select the "name" field
540+
userEvent.click(screen.getByText('name'));
541+
expect(screen.getByLabelText('JSON Type')).to.have.value('String');
542+
expect(screen.getByLabelText('Faker Function')).to.have.value(
543+
'person.firstName'
544+
);
545+
546+
// select the "age" field
547+
userEvent.click(screen.getByText('age'));
548+
expect(screen.getByLabelText('JSON Type')).to.have.value('Int32');
549+
expect(screen.getByLabelText('Faker Function')).to.have.value(
550+
'number.int'
551+
);
552+
553+
// select the "type" field
554+
userEvent.click(screen.getByText('type'));
555+
expect(screen.getByLabelText('JSON Type')).to.have.value('String');
556+
expect(screen.getByLabelText('Faker Function')).to.have.value(
557+
'Unrecognized'
558+
);
559+
});
560+
405561
it('disables the Next button when the faker schema mapping is not confirmed', async () => {
406562
await renderModal({
407563
mockServices: mockServicesWithMockDataResponse,

packages/compass-collection/src/modules/collection-tab.ts

Lines changed: 71 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,7 @@ import type { AtlasAiService } from '@mongodb-js/compass-generative-ai/provider'
1616
import type { experimentationServiceLocator } from '@mongodb-js/compass-telemetry/provider';
1717
import { type Logger, mongoLogId } from '@mongodb-js/compass-logging/provider';
1818
import { type PreferencesAccess } from 'compass-preferences-model/provider';
19-
import type {
20-
MockDataSchemaRequest,
21-
MockDataSchemaResponse,
22-
} from '@mongodb-js/compass-generative-ai';
19+
import type { MockDataSchemaRequest } from '@mongodb-js/compass-generative-ai';
2320
import { isInternalFieldPath } from 'hadron-document';
2421
import toNS from 'mongodb-ns';
2522
import {
@@ -699,32 +696,74 @@ export const cancelSchemaAnalysis = (): CollectionThunkAction<void> => {
699696
};
700697
};
701698

699+
/**
700+
* Validates a given faker schema against an input schema.
701+
*
702+
* - Filters out fields from the faker schema that do not exist in the input schema.
703+
* - Validates the `fakerMethod` for each field, marking it as unrecognized if invalid.
704+
* - Adds any unmapped input schema fields to the result with an unrecognized faker method.
705+
*
706+
* @param inputSchema - The schema definition for the input, mapping field names to their metadata.
707+
* @param fakerSchema - The array of faker schema mappings to validate and map.
708+
* @param logger - Logger instance used to log warnings for invalid faker methods.
709+
* @returns An array of validated faker schema mappings, including all input schema fields.
710+
*/
702711
const validateFakerSchema = (
703-
fakerSchema: MockDataSchemaResponse,
712+
inputSchema: Record<string, FieldInfo>,
713+
fakerSchema: FakerSchemaMapping[],
704714
logger: Logger
705-
) => {
706-
return fakerSchema.content.fields.map((field) => {
707-
const { fakerMethod } = field;
708-
709-
const [moduleName, methodName, ...rest] = fakerMethod.split('.');
710-
if (
711-
rest.length > 0 ||
712-
typeof (faker as any)[moduleName]?.[methodName] !== 'function'
713-
) {
714-
logger.log.warn(
715-
mongoLogId(1_001_000_372),
716-
'Collection',
717-
'Invalid faker method',
718-
{ fakerMethod }
719-
);
720-
return {
721-
...field,
722-
fakerMethod: UNRECOGNIZED_FAKER_METHOD,
723-
};
724-
}
715+
): FakerSchemaMapping[] => {
716+
const inputSchemaFields = Object.keys(inputSchema);
717+
const validatedFakerSchema = fakerSchema
718+
// Drop fields that don't match the input schema structure
719+
.filter((field) => inputSchema[field.fieldPath])
720+
.map((field) => {
721+
const { fakerMethod } = field;
722+
723+
// validate faker method
724+
const methodSegments = fakerMethod.split('.');
725+
let methodRef: any = faker;
726+
for (const segment of methodSegments) {
727+
if (
728+
methodRef &&
729+
typeof methodRef === 'object' &&
730+
segment in methodRef
731+
) {
732+
methodRef = methodRef[segment];
733+
} else {
734+
methodRef = undefined;
735+
break;
736+
}
737+
}
738+
if (typeof methodRef !== 'function') {
739+
logger.log.warn(
740+
mongoLogId(1_001_000_372),
741+
'Collection',
742+
'Invalid faker method',
743+
{ fakerMethod }
744+
);
745+
return {
746+
...field,
747+
fakerMethod: UNRECOGNIZED_FAKER_METHOD,
748+
};
749+
}
725750

726-
return field;
727-
});
751+
return field;
752+
});
753+
const unmappedInputFields = inputSchemaFields.filter(
754+
(field) =>
755+
!validatedFakerSchema.find(({ fieldPath }) => fieldPath === field)
756+
);
757+
// Default unmapped input fields to "Unrecognized" faker method
758+
const unmappedFields = unmappedInputFields.map((field) => ({
759+
fieldPath: field,
760+
fakerMethod: UNRECOGNIZED_FAKER_METHOD,
761+
mongoType: inputSchema[field].type,
762+
fakerArgs: [],
763+
probability: 1,
764+
}));
765+
766+
return [...validatedFakerSchema, ...unmappedFields];
728767
};
729768

730769
export const generateFakerMappings = (): CollectionThunkAction<
@@ -793,7 +832,11 @@ export const generateFakerMappings = (): CollectionThunkAction<
793832
connectionInfoRef.current
794833
);
795834

796-
const validatedFakerSchema = validateFakerSchema(response, logger);
835+
const validatedFakerSchema = validateFakerSchema(
836+
schemaAnalysis.processedSchema,
837+
response.content.fields,
838+
logger
839+
);
797840

798841
fakerSchemaGenerationAbortControllerRef.current = undefined;
799842
dispatch({

0 commit comments

Comments
 (0)