Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Automatic Import] Safely access non-identifier fields in Painless if context #205220

Merged
merged 9 commits into from
Jan 3, 2025
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ describe('Testing pipeline templates', () => {
target_field: '@timestamp',
formats: ["yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS'Z'", 'ISO8601'],
tag: 'date_processor_xdfsfs.ds.@timestamp',
if: 'ctx.xdfsfs?.ds?.@timestamp != null',
if: 'ctx.xdfsfs?.ds?.get("@timestamp") != null',
},
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,8 @@ import { Pipeline, ESProcessorItem } from '../../../common';
import type { EcsMappingState } from '../../types';
import { ECS_TYPES } from './constants';
import { deepCopy } from '../../util/util';

interface IngestPipeline {
[key: string]: unknown;
}
import { type FieldPath, fieldPathToProcessorString } from '../../util/fields';
import { fieldPathToPainlessExpression, SafePainlessExpression } from '../../util/painless';

interface ECSField {
target: string;
Expand All @@ -24,16 +22,36 @@ interface ECSField {
type: string;
}

const KNOWN_ES_TYPES = ['long', 'float', 'scaled_float', 'ip', 'boolean', 'keyword'];
type KnownESType = (typeof KNOWN_ES_TYPES)[number];

/**
* Clarifies the types of specific fields in pipeline processors.
*
* This includes safety requirements for Painless script fields.
* Restricted to the processors that we generate in this file.
*/
interface SafeESProcessorItem extends ESProcessorItem {
[k: string]: {
field?: string;
if?: SafePainlessExpression;
ignore_missing?: boolean;
target_field?: string;
type?: KnownESType;
formats?: string[];
};
}

function generateProcessor(
currentPath: string,
currentPath: FieldPath,
ecsField: ECSField,
expectedEcsType: string,
sampleValue: unknown
): object {
): SafeESProcessorItem {
if (needsTypeConversion(sampleValue, expectedEcsType)) {
return {
convert: {
field: currentPath,
field: fieldPathToProcessorString(currentPath),
target_field: ecsField.target,
type: getConvertProcessorType(expectedEcsType),
ignore_missing: true,
Expand All @@ -44,17 +62,17 @@ function generateProcessor(
if (ecsField.type === 'date') {
return {
date: {
field: currentPath,
field: fieldPathToProcessorString(currentPath),
target_field: ecsField.target,
formats: convertIfIsoDate(ecsField.date_formats),
if: currentPath.replace(/\./g, '?.'),
if: fieldPathToPainlessExpression(currentPath),
},
};
}

return {
rename: {
field: currentPath,
field: fieldPathToProcessorString(currentPath),
target_field: ecsField.target,
ignore_missing: true,
},
Expand All @@ -74,10 +92,9 @@ function convertIfIsoDate(date: string[]): string[] {
return date;
}

function getSampleValue(key: string, samples: Record<string, any>): unknown {
const keyList = key.split('.');
function getSampleValue(fieldPath: FieldPath, samples: Record<string, any>): unknown {
let value: any = samples;
for (const k of keyList) {
for (const k of fieldPath) {
if (value === undefined || value === null) {
return null;
}
Expand All @@ -91,7 +108,7 @@ function getEcsType(ecsField: ECSField, ecsTypes: Record<string, string>): strin
return ecsTypes[ecsTarget];
}

function getConvertProcessorType(expectedEcsType: string): string {
function getConvertProcessorType(expectedEcsType: KnownESType): KnownESType {
if (expectedEcsType === 'long') {
return 'long';
}
Expand All @@ -107,7 +124,7 @@ function getConvertProcessorType(expectedEcsType: string): string {
return 'string';
}

function needsTypeConversion(sample: unknown, expected: string): boolean {
function needsTypeConversion(sample: unknown, expected: KnownESType): boolean {
if (sample === null || sample === undefined) {
return false;
}
Expand Down Expand Up @@ -136,16 +153,20 @@ function needsTypeConversion(sample: unknown, expected: string): boolean {
return false;
}

function generateProcessors(ecsMapping: object, samples: object, basePath: string = ''): object[] {
function generateProcessors(
ecsMapping: object,
samples: object,
basePath: FieldPath = []
): SafeESProcessorItem[] {
if (Object.keys(ecsMapping).length === 0) {
return [];
}
const ecsTypes = ECS_TYPES;
const valueFieldKeys = new Set(['target', 'confidence', 'date_formats', 'type']);
const results: object[] = [];
const results: SafeESProcessorItem[] = [];

for (const [key, value] of Object.entries(ecsMapping)) {
const currentPath = basePath ? `${basePath}.${key}` : key;
const currentPath = [...basePath, key];

if (value !== null && typeof value === 'object' && value?.target !== null) {
const valueKeys = new Set(Object.keys(value));
Expand All @@ -162,10 +183,11 @@ function generateProcessors(ecsMapping: object, samples: object, basePath: strin
}
}
}

return results;
}

export function createPipeline(state: EcsMappingState): IngestPipeline {
export function createPipeline(state: EcsMappingState): Pipeline {
const samples = JSON.parse(state.combinedSamples);

const processors = generateProcessors(state.finalMapping, samples);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ processors:
- {{ format }}
{% endfor %}
tag: date_processor_{{ value.field}}
if: "ctx.{{ value.if }} != null"{% endif %}
if: |-
{{ value.if }} != null{% endif %}
{% if key == 'convert' %}
- {{ key }}:
field: {{ value.field }}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { fieldPathToProcessorString } from './fields';

describe('fieldPathToProcessorString', () => {
it('should join an array of strings with dots', () => {
const result = fieldPathToProcessorString(['foo', 'bar', 'baz']);
expect(result).toBe('foo.bar.baz');
});

it('should return an empty string if array is empty', () => {
const result = fieldPathToProcessorString([]);
expect(result).toBe('');
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

/**
* Represents a path to a field as an array of string segments.
* Each element in the array represents a level in the field hierarchy.
*
* A segment might contain a character that is invalid in some contexts.
* @example ['person', 'address', 'street-level']
*/
export type FieldPath = string[];

/**
* Converts a FieldPath array into a string useable as the field in the ingest pipeline.
*
* @param fieldPath - The array of field names representing the path.
* @returns The processor string created by joining the field names with a dot.
*/
export function fieldPathToProcessorString(fieldPath: FieldPath): string {
return fieldPath.join('.');
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import {
isPainlessIdentifier,
painlessStringRepresentation,
addPainlessFieldAccess,
fieldPathToPainlessExpression,
type SafePainlessExpression,
} from './painless';

describe('isPainlessIdentifier', () => {
it('should return true for valid identifiers', () => {
expect(isPainlessIdentifier('_validIdentifier123')).toBe(true);
expect(isPainlessIdentifier('valid')).toBe(true);
});

it('should return false for invalid identifiers', () => {
expect(isPainlessIdentifier('123start')).toBe(false); // Identifiers cannot start with a number
expect(isPainlessIdentifier('new')).toBe(true); // Reserved words are valid identifiers
expect(isPainlessIdentifier('_source')).toBe(true); // Underscore-prefixed identifiers are valid
expect(isPainlessIdentifier('invalid-char!')).toBe(false); // Identifiers cannot contain special characters
});
});

describe('painlessFieldEscape', () => {
it('should return a quoted and escaped string', () => {
expect(painlessStringRepresentation('simple')).toBe('"simple"');
expect(painlessStringRepresentation('"quote"')).toBe('"\\"quote\\""');
expect(painlessStringRepresentation('back\\slash')).toBe('"back\\\\slash"');
});
});

describe('addPainlessFieldAccess', () => {
it('should add a dot-access for valid identifiers', () => {
const expr = 'root' as SafePainlessExpression;
const result = addPainlessFieldAccess('foo', expr, false);
expect(result).toBe('root.foo');
});

it('should add a nullable dot-access for valid identifiers', () => {
const expr = 'root' as SafePainlessExpression;
const result = addPainlessFieldAccess('foo', expr);
expect(result).toBe('root?.foo');
});

it('should add a get-access for invalid identifiers', () => {
const expr = 'root' as SafePainlessExpression;
const result = addPainlessFieldAccess('foo-bar', expr, false);
expect(result).toContain('"foo-bar"');
expect(result).toBe('root.get("foo-bar")');
});

it('should add a nullable get-access for invalid identifiers in the chain', () => {
const expr = 'root' as SafePainlessExpression;
const result = addPainlessFieldAccess('foo-bar', expr, true);
expect(result).toContain('"foo-bar"');
expect(result).toBe('root?.get("foo-bar")');
});
});

describe('fieldPathToPainlessExpression', () => {
it('should build a nested expression from a simple field path', () => {
const result = fieldPathToPainlessExpression(['source', 'ip']);
expect(result).toBe('ctx.source?.ip');
});

it('should quote invalid identifiers', () => {
const result = fieldPathToPainlessExpression(['ip-address']);
expect(result).toContain('"ip-address"');
expect(result).toBe('ctx.get("ip-address")');
});

it('should use nullable get access for nested invalid identifiers', () => {
const result = fieldPathToPainlessExpression(['field', 'ip-address']);
expect(result).toContain('"ip-address"');
expect(result).toBe('ctx.field?.get("ip-address")');
});

it('should return just "ctx" if the path is empty', () => {
const result = fieldPathToPainlessExpression([]);
expect(result).toBe('ctx');
});
});
Loading
Loading