Skip to content

feat: allow to scan secrets without buffering whole lines #6318

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion packages/build/src/plugins_core/secrets_scanning/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ const coreStep: CoreStepFunction = async function ({
netlifyConfig,
explicitSecretKeys,
enhancedSecretScan,
featureFlags,
systemLog,
deployId,
api,
Expand All @@ -37,8 +38,9 @@ const coreStep: CoreStepFunction = async function ({

const passedSecretKeys = (explicitSecretKeys || '').split(',')
const envVars = netlifyConfig.build.environment as Record<string, unknown>
const useReadLine = !featureFlags?.secret_scanning_no_readline

systemLog?.({ passedSecretKeys, buildDir })
systemLog?.({ passedSecretKeys, buildDir, useReadLine })

if (!isSecretsScanningEnabled(envVars)) {
logSecretsScanSkipMessage(logs, 'Secrets scanning disabled via SECRETS_SCAN_ENABLED flag set to false.')
Expand Down Expand Up @@ -91,6 +93,7 @@ const coreStep: CoreStepFunction = async function ({
keys: keysToSearchFor,
base: buildDir as string,
filePaths,
useReadLine,
})

secretMatches = scanResults.matches.filter((match) => explicitSecretKeysToScanFor.includes(match.key))
Expand All @@ -103,6 +106,7 @@ const coreStep: CoreStepFunction = async function ({
enhancedSecretsScanMatchesCount: enhancedSecretMatches.length,
secretsFilesCount: scanResults.scannedFilesCount,
keysToSearchFor,
useReadLine,
}

systemLog?.(attributesForLogsAndSpan)
Expand Down
157 changes: 155 additions & 2 deletions packages/build/src/plugins_core/secrets_scanning/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ interface ScanArgs {
keys: string[]
base: string
filePaths: string[]
useReadLine: boolean
}

interface MatchResult {
Expand Down Expand Up @@ -215,7 +216,13 @@ const omitPathMatches = (relativePath, omitPaths) => {
* @param scanArgs {ScanArgs} scan options
* @returns promise with all of the scan results, if any
*/
export async function scanFilesForKeyValues({ env, keys, filePaths, base }: ScanArgs): Promise<ScanResults> {
export async function scanFilesForKeyValues({
env,
keys,
filePaths,
base,
useReadLine,
}: ScanArgs): Promise<ScanResults> {
const scanResults: ScanResults = {
matches: [],
scannedFilesCount: 0,
Expand Down Expand Up @@ -245,6 +252,8 @@ export async function scanFilesForKeyValues({ env, keys, filePaths, base }: Scan

let settledPromises: PromiseSettledResult<MatchResult[]>[] = []

const searchStream = useReadLine ? searchStreamReadline : searchStreamNoReadline

// process the scanning in batches to not run into memory issues by
// processing all files at the same time.
while (filePaths.length > 0) {
Expand All @@ -269,7 +278,14 @@ export async function scanFilesForKeyValues({ env, keys, filePaths, base }: Scan
return scanResults
}

const searchStream = (basePath: string, file: string, keyValues: Record<string, string[]>): Promise<MatchResult[]> => {
/**
* Search stream implementation using node:readline
*/
const searchStreamReadline = (
basePath: string,
file: string,
keyValues: Record<string, string[]>,
): Promise<MatchResult[]> => {
return new Promise((resolve, reject) => {
const filePath = path.resolve(basePath, file)

Expand Down Expand Up @@ -391,6 +407,143 @@ const searchStream = (basePath: string, file: string, keyValues: Record<string,
})
}

/**
* Search stream implementation using just read stream that allows to buffer less content
*/
const searchStreamNoReadline = (
basePath: string,
file: string,
keyValues: Record<string, string[]>,
): Promise<MatchResult[]> => {
return new Promise((resolve, reject) => {
const filePath = path.resolve(basePath, file)

const inStream = createReadStream(filePath)
const matches: MatchResult[] = []

const keyVals: string[] = ([] as string[]).concat(...Object.values(keyValues))

const maxValLength = Math.max(0, ...keyVals.map((v) => v.length))
if (maxValLength === 0) {
// no non-empty values to scan for
return matches
}

const minValLength = Math.min(...keyVals.map((v) => v.length))

function getKeyForValue(val) {
let key = ''
for (const [secretKeyName, valuePermutations] of Object.entries(keyValues)) {
if (valuePermutations.includes(val)) {
key = secretKeyName
}
}
return key
}

let buffer = ''

function getCurrentBufferNewLineIndexes() {
const newLinesIndexesInCurrentBuffer = [] as number[]
let newLineIndex = -1
while ((newLineIndex = buffer.indexOf('\n', newLineIndex + 1)) !== -1) {
newLinesIndexesInCurrentBuffer.push(newLineIndex)
}

return newLinesIndexesInCurrentBuffer
}
let fileIndex = 0
let processedLines = 0
const foundIndexes = new Map<string, Set<number>>()
const foundLines = new Map<string, Set<number>>()
inStream.on('data', function (chunk) {
const newChunk = chunk.toString()

buffer += newChunk

let newLinesIndexesInCurrentBuffer = null as null | number[]

if (buffer.length > minValLength) {
for (const valVariant of keyVals) {
let valVariantIndex = -1
while ((valVariantIndex = buffer.indexOf(valVariant, valVariantIndex + 1)) !== -1) {
const pos = fileIndex + valVariantIndex
let foundIndexesForValVariant = foundIndexes.get(valVariant)
if (!foundIndexesForValVariant?.has(pos)) {
if (newLinesIndexesInCurrentBuffer === null) {
newLinesIndexesInCurrentBuffer = getCurrentBufferNewLineIndexes()
}

let lineNumber = processedLines + 1
for (const newLineIndex of newLinesIndexesInCurrentBuffer) {
if (valVariantIndex > newLineIndex) {
lineNumber++
} else {
break
}
}

let foundLinesForValVariant = foundLines.get(valVariant)
if (!foundLinesForValVariant?.has(lineNumber)) {
matches.push({
file,
lineNumber,
key: getKeyForValue(valVariant),
})

if (!foundLinesForValVariant) {
foundLinesForValVariant = new Set<number>()
foundLines.set(valVariant, foundLinesForValVariant)
}
foundLinesForValVariant.add(lineNumber)
}

if (!foundIndexesForValVariant) {
foundIndexesForValVariant = new Set<number>()
foundIndexes.set(valVariant, foundIndexesForValVariant)
}
foundIndexesForValVariant.add(pos)
}
}
}
}

if (buffer.length > maxValLength) {
const lengthDiff = buffer.length - maxValLength
fileIndex += lengthDiff
if (newLinesIndexesInCurrentBuffer === null) {
newLinesIndexesInCurrentBuffer = getCurrentBufferNewLineIndexes()
}

// advanced processed lines
for (const newLineIndex of newLinesIndexesInCurrentBuffer) {
if (newLineIndex < lengthDiff) {
processedLines++
} else {
break
}
}

// Keep the last part of the buffer to handle split values across chunks
buffer = buffer.slice(-maxValLength)
}
})

inStream.on('error', function (error: any) {
if (error?.code === 'EISDIR') {
// file path is a directory - do nothing
resolve(matches)
} else {
reject(error)
}
})

inStream.on('close', function () {
resolve(matches)
})
})
}

/**
* ScanResults are all of the finds for all keys and their disparate locations. Scanning is
* async in streams so order can change a lot. Some matches are the result of an env var explictly being marked as secret,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import { randomBytes } from "node:crypto";
import { createWriteStream, mkdirSync } from "node:fs";

mkdirSync('dist', { recursive: true });

const writer = createWriteStream('dist/out.txt', { flags: "w" });

async function writeLotOfBytesWithoutNewLines() {
const max_size = 128 * 1024 * 1024; // 128MB
const chunk_size = 1024 * 1024; // 1MB

let bytes_written = 0;
while (bytes_written < max_size) {
const bytes_to_write = Math.min(chunk_size, max_size - bytes_written);
const buffer = randomBytes(bytes_to_write).map((byte) =>
// swap LF and CR to something else
byte === 0x0d || byte === 0x0a ? 0x0b : byte
);

writer.write(buffer);
bytes_written += bytes_to_write;
}
}

await writeLotOfBytesWithoutNewLines()
writer.write(process.env.ENV_SECRET)
await writeLotOfBytesWithoutNewLines()

await new Promise((resolve, reject) => {
writer.close(err => {
if (err) {
reject(err);
} else {
resolve();
}
})
})

Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[build]
command = 'node generate.mjs'
publish = "./dist"
Loading
Loading