diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index 53c6a9b5cfb52..d3efa63e408ee 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/webui/src/lib/constants/binary-detection.ts b/tools/server/webui/src/lib/constants/binary-detection.ts new file mode 100644 index 0000000000000..a4440fde5d493 --- /dev/null +++ b/tools/server/webui/src/lib/constants/binary-detection.ts @@ -0,0 +1,14 @@ +export interface BinaryDetectionOptions { + /** Number of characters to check from the beginning of the file */ + prefixLength: number; + /** Maximum ratio of suspicious characters allowed (0.0 to 1.0) */ + suspiciousCharThresholdRatio: number; + /** Maximum absolute number of null bytes allowed */ + maxAbsoluteNullBytes: number; +} + +export const DEFAULT_BINARY_DETECTION_OPTIONS: BinaryDetectionOptions = { + prefixLength: 1024 * 10, // Check the first 10KB of the string + suspiciousCharThresholdRatio: 0.15, // Allow up to 15% suspicious chars + maxAbsoluteNullBytes: 2 +}; diff --git a/tools/server/webui/src/lib/constants/supported-file-types.ts b/tools/server/webui/src/lib/constants/supported-file-types.ts index f53b55c476ff0..f6c5d2dc18fe1 100644 --- a/tools/server/webui/src/lib/constants/supported-file-types.ts +++ b/tools/server/webui/src/lib/constants/supported-file-types.ts @@ -176,5 +176,13 @@ export const TEXT_FILE_TYPES = { [FileTypeText.SVELTE]: { extensions: [FileExtensionText.SVELTE], mimeTypes: [MimeTypeText.SVELTE] + }, + [FileTypeText.LATEX]: { + extensions: [FileExtensionText.TEX], + mimeTypes: [MimeTypeText.LATEX] + }, + [FileTypeText.BIBTEX]: { + extensions: [FileExtensionText.BIB], + mimeTypes: [MimeTypeText.BIBTEX] } } as const; diff --git a/tools/server/webui/src/lib/enums/files.ts b/tools/server/webui/src/lib/enums/files.ts index 5aec9e7e9f7d3..19b79d32de3c4 100644 --- a/tools/server/webui/src/lib/enums/files.ts +++ b/tools/server/webui/src/lib/enums/files.ts @@ -59,7 +59,9 @@ export enum FileTypeText { SWIFT = 'swift', DART = 'dart', VUE = 'vue', - SVELTE = 'svelte' + SVELTE = 'svelte', + LATEX = 'latex', + BIBTEX = 'bibtex' } // File extension enums @@ -115,7 +117,9 @@ export enum FileExtensionText { SWIFT = '.swift', DART = '.dart', VUE = '.vue', - SVELTE = '.svelte' + SVELTE = '.svelte', + TEX = '.tex', + BIB = '.bib' } // MIME type enums @@ -174,5 +178,7 @@ export enum MimeTypeText { SWIFT = 'text/x-swift', DART = 'text/x-dart', VUE = 'text/x-vue', - SVELTE = 'text/x-svelte' + SVELTE = 'text/x-svelte', + LATEX = 'text/x-tex', + BIBTEX = 'text/x-bibtex' } diff --git a/tools/server/webui/src/lib/utils/text-files.ts b/tools/server/webui/src/lib/utils/text-files.ts index 496f5c79e47b3..d882e3c2d39b7 100644 --- a/tools/server/webui/src/lib/utils/text-files.ts +++ b/tools/server/webui/src/lib/utils/text-files.ts @@ -3,6 +3,10 @@ * Handles text file detection, reading, and validation */ +import { + DEFAULT_BINARY_DETECTION_OPTIONS, + type BinaryDetectionOptions +} from '$lib/constants/binary-detection'; import { FileExtensionText } from '$lib/enums/files'; /** @@ -43,41 +47,51 @@ export async function readFileAsText(file: File): Promise { * Heuristic check to determine if content is likely from a text file * Detects binary files by counting suspicious characters and null bytes * @param content - The file content to analyze + * @param options - Optional configuration for detection parameters * @returns True if the content appears to be text-based */ -export function isLikelyTextFile(content: string): boolean { +export function isLikelyTextFile( + content: string, + options: Partial = {} +): boolean { if (!content) return true; - const sample = content.substring(0, 1000); + const config = { ...DEFAULT_BINARY_DETECTION_OPTIONS, ...options }; + const sample = content.substring(0, config.prefixLength); - let suspiciousCount = 0; let nullCount = 0; + let suspiciousControlCount = 0; for (let i = 0; i < sample.length; i++) { const charCode = sample.charCodeAt(i); - // Count null bytes + // Count null bytes - these are strong indicators of binary files if (charCode === 0) { nullCount++; - suspiciousCount++; continue; } - // Count suspicious control characters (excluding common ones like tab, newline, carriage return) + // Count suspicious control characters + // Allow common whitespace characters: tab (9), newline (10), carriage return (13) if (charCode < 32 && charCode !== 9 && charCode !== 10 && charCode !== 13) { - suspiciousCount++; + // Count most suspicious control characters + if (charCode < 8 || (charCode > 13 && charCode < 27)) { + suspiciousControlCount++; + } } // Count replacement characters (indicates encoding issues) if (charCode === 0xfffd) { - suspiciousCount++; + suspiciousControlCount++; } } - // Reject if too many null bytes or suspicious characters - if (nullCount > 2) return false; - if (suspiciousCount / sample.length > 0.1) return false; + // Reject if too many null bytes + if (nullCount > config.maxAbsoluteNullBytes) return false; + + // Reject if too many suspicious characters + if (suspiciousControlCount / sample.length > config.suspiciousCharThresholdRatio) return false; return true; }