Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified tools/server/public/index.html.gz
Binary file not shown.
14 changes: 14 additions & 0 deletions tools/server/webui/src/lib/constants/binary-detection.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
export interface BinaryDetectionOptions {
/** Number of characters to check from the beginning of the file */
prefixLength: number;
/** Maximum ratio of suspicious characters allowed (0.0 to 1.0) */
suspiciousCharThresholdRatio: number;
/** Maximum absolute number of null bytes allowed */
maxAbsoluteNullBytes: number;
}

export const DEFAULT_BINARY_DETECTION_OPTIONS: BinaryDetectionOptions = {
prefixLength: 1024 * 10, // Check the first 10KB of the string
suspiciousCharThresholdRatio: 0.15, // Allow up to 15% suspicious chars
maxAbsoluteNullBytes: 2
};
8 changes: 8 additions & 0 deletions tools/server/webui/src/lib/constants/supported-file-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -176,5 +176,13 @@ export const TEXT_FILE_TYPES = {
[FileTypeText.SVELTE]: {
extensions: [FileExtensionText.SVELTE],
mimeTypes: [MimeTypeText.SVELTE]
},
[FileTypeText.LATEX]: {
extensions: [FileExtensionText.TEX],
mimeTypes: [MimeTypeText.LATEX]
},
[FileTypeText.BIBTEX]: {
extensions: [FileExtensionText.BIB],
mimeTypes: [MimeTypeText.BIBTEX]
}
} as const;
12 changes: 9 additions & 3 deletions tools/server/webui/src/lib/enums/files.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@ export enum FileTypeText {
SWIFT = 'swift',
DART = 'dart',
VUE = 'vue',
SVELTE = 'svelte'
SVELTE = 'svelte',
LATEX = 'latex',
BIBTEX = 'bibtex'
}

// File extension enums
Expand Down Expand Up @@ -115,7 +117,9 @@ export enum FileExtensionText {
SWIFT = '.swift',
DART = '.dart',
VUE = '.vue',
SVELTE = '.svelte'
SVELTE = '.svelte',
TEX = '.tex',
BIB = '.bib'
}

// MIME type enums
Expand Down Expand Up @@ -174,5 +178,7 @@ export enum MimeTypeText {
SWIFT = 'text/x-swift',
DART = 'text/x-dart',
VUE = 'text/x-vue',
SVELTE = 'text/x-svelte'
SVELTE = 'text/x-svelte',
LATEX = 'text/x-tex',
BIBTEX = 'text/x-bibtex'
}
36 changes: 25 additions & 11 deletions tools/server/webui/src/lib/utils/text-files.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
* Handles text file detection, reading, and validation
*/

import {
DEFAULT_BINARY_DETECTION_OPTIONS,
type BinaryDetectionOptions
} from '$lib/constants/binary-detection';
import { FileExtensionText } from '$lib/enums/files';

/**
Expand Down Expand Up @@ -43,41 +47,51 @@ export async function readFileAsText(file: File): Promise<string> {
* Heuristic check to determine if content is likely from a text file
* Detects binary files by counting suspicious characters and null bytes
* @param content - The file content to analyze
* @param options - Optional configuration for detection parameters
* @returns True if the content appears to be text-based
*/
export function isLikelyTextFile(content: string): boolean {
export function isLikelyTextFile(
content: string,
options: Partial<BinaryDetectionOptions> = {}
): boolean {
if (!content) return true;

const sample = content.substring(0, 1000);
const config = { ...DEFAULT_BINARY_DETECTION_OPTIONS, ...options };
const sample = content.substring(0, config.prefixLength);

let suspiciousCount = 0;
let nullCount = 0;
let suspiciousControlCount = 0;

for (let i = 0; i < sample.length; i++) {
const charCode = sample.charCodeAt(i);

// Count null bytes
// Count null bytes - these are strong indicators of binary files
if (charCode === 0) {
nullCount++;
suspiciousCount++;

continue;
}

// Count suspicious control characters (excluding common ones like tab, newline, carriage return)
// Count suspicious control characters
// Allow common whitespace characters: tab (9), newline (10), carriage return (13)
if (charCode < 32 && charCode !== 9 && charCode !== 10 && charCode !== 13) {
suspiciousCount++;
// Count most suspicious control characters
if (charCode < 8 || (charCode > 13 && charCode < 27)) {
suspiciousControlCount++;
}
}

// Count replacement characters (indicates encoding issues)
if (charCode === 0xfffd) {
suspiciousCount++;
suspiciousControlCount++;
}
}

// Reject if too many null bytes or suspicious characters
if (nullCount > 2) return false;
if (suspiciousCount / sample.length > 0.1) return false;
// Reject if too many null bytes
if (nullCount > config.maxAbsoluteNullBytes) return false;

// Reject if too many suspicious characters
if (suspiciousControlCount / sample.length > config.suspiciousCharThresholdRatio) return false;

return true;
}
Loading