diff --git a/README.md b/README.md index 47aeaf6a..c14bea73 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,7 @@ All it takes is modest JavaScript/TypeScript knowledge and understanding of the - 🤖 AI-Powered Analysis: Automatically extracts and analyzes coding problems using GPT-4o - 💡 Solution Generation: Get detailed explanations and solutions with time/space complexity analysis - 🔧 Real-time Debugging: Debug your code with AI assistance and structured feedback +- 🎙️ Speech Recognition Helper: Record and transcribe interview conversations with AI-powered answer suggestions - 🎨 Advanced Window Management: Freely move, resize, change opacity, and zoom the window - 🔄 Model Selection: Choose between GPT-4o and GPT-4o-mini for different processing stages - 🔒 Privacy-Focused: Your API key and data never leave your computer except for OpenAI API calls @@ -70,6 +71,7 @@ The application uses unidentifiable global keyboard shortcuts that won't be dete - Delete Last Screenshot: [Control or Cmd + L] - Process Screenshots: [Control or Cmd + Enter] - Start New Problem: [Control or Cmd + R] +- Toggle Recording: [Control or Cmd + M] (Speech Recognition Helper) - Quit: [Control or Cmd + Q] - Decrease Opacity: [Control or Cmd + [] - Increase Opacity: [Control or Cmd + ]] @@ -96,7 +98,7 @@ Note: The application is **NOT** invisible to: - Node.js (v16 or higher) - npm or bun package manager -- OpenAI API Key +- OpenAI API Key (required for all AI features including speech recognition) - Screen Recording Permission for Terminal/IDE - On macOS: 1. Go to System Preferences > Security & Privacy > Privacy > Screen Recording @@ -106,6 +108,14 @@ Note: The application is **NOT** invisible to: - No additional permissions needed - On Linux: - May require `xhost` access depending on your distribution +- Microphone Permission (required for Speech Recognition Helper) + - On macOS: + 1. Go to System Preferences > Security & Privacy > Privacy > Microphone + 2. Ensure that CodeInterviewAssist has microphone permission enabled + - On Windows: + - Windows will prompt for microphone access when first using the feature + - On Linux: + - May require PulseAudio or ALSA configuration depending on your distribution ## Running the Application @@ -201,6 +211,9 @@ The packaged applications will be available in the `release` directory. | Multi-language Support | ✅ | ✅ | | Time/Space Complexity Analysis | ✅ | ✅ | | Window Management | ✅ | ✅ | +| Speech Recognition | ✅ | ✅ (OpenAI Whisper) | +| AI Answer Suggestions | ✅ | ✅ (Context-aware) | +| Conversation History | ✅ | ✅ | | Auth System | Required | None (Simplified) | | Payment Processing | Required | None (Use your own API key) | | Privacy | Server-processed | 100% Local Processing | @@ -215,7 +228,8 @@ The packaged applications will be available in the `release` directory. - Vite - Tailwind CSS - Radix UI Components -- OpenAI API +- OpenAI API (GPT-4o, GPT-4o-mini, Whisper) +- Web Audio API (for speech recording) ## How It Works @@ -248,13 +262,28 @@ The packaged applications will be available in the `release` directory. - Window remains invisible to specified screen sharing applications - Start a new problem using [Control or Cmd + R] -6. **Language Selection +6. **Language Selection** - Easily switch between programming languages with a single click - Use arrow keys for keyboard navigation through available languages - The system dynamically adapts to any languages added or removed from the codebase - Your language preference is saved between sessions +7. **Speech Recognition Helper** + + - Record interview conversations using your microphone with [Control or Cmd + M] + - Automatically transcribe audio to text using OpenAI's Whisper API + - Toggle between "Interviewer" and "You" (Interviewee) speaker modes + - Maintain conversation history with timestamps for both speakers + - Get AI-powered answer suggestions when the interviewer asks questions + - Suggestions are context-aware and consider: + - Previous conversation history + - Your previous answers for consistency + - Screenshot context (if coding problems are captured) + - View real-time transcription and suggestions in the Conversations view + - All audio processing happens locally; only transcription requests are sent to OpenAI + - Supports both coding interviews (with screenshot context) and behavioral interviews + ## Adding More AI Models This application is built with extensibility in mind. You can easily add support for additional LLMs alongside the existing OpenAI integration: @@ -272,10 +301,29 @@ To add new models, simply extend the API integration in `electron/ProcessingHelp - Problem Extraction: Analyzes screenshots to understand the coding problem - Solution Generation: Creates optimized solutions with explanations - Debugging: Provides detailed analysis of errors and improvement suggestions +- **Speech Recognition Model**: Configure the speech-to-text model for transcription: + - Currently supports OpenAI's Whisper-1 model + - Only available when using OpenAI as the API provider + - Configured in Settings > Speech Recognition Model - **Language**: Select your preferred programming language for solutions - **Window Controls**: Adjust opacity, position, and zoom level using keyboard shortcuts - **All settings are stored locally** in your user data directory and persist between sessions +### Speech Recognition Helper Configuration + +The Speech Recognition Helper uses OpenAI's Whisper API for transcription. To use this feature: + +1. **API Provider**: Must be set to OpenAI (not Gemini or Anthropic) +2. **Speech Recognition Model**: Select "whisper-1" in Settings +3. **Microphone Access**: Grant microphone permissions when prompted +4. **Usage**: + - Press [Control or Cmd + M] to start/stop recording + - Toggle speaker mode between Interviewer and You (Interviewee) + - View transcribed conversation and AI suggestions in the Conversations view + - Suggestions automatically appear when interviewer questions are detected + +**Note**: Speech recognition requires an active OpenAI API key with sufficient credits. Audio is processed locally and only sent to OpenAI for transcription. Conversation history is stored locally and never transmitted except for transcription requests. + ## License This project is licensed under the GNU Affero General Public License v3.0 (AGPL-3.0). diff --git a/electron/AnswerAssistant.ts b/electron/AnswerAssistant.ts new file mode 100644 index 00000000..95461415 --- /dev/null +++ b/electron/AnswerAssistant.ts @@ -0,0 +1,382 @@ +/** + * AnswerAssistant - Generates AI-powered answer suggestions based on conversation context + * Follows Single Responsibility Principle - only handles answer suggestion generation + * Uses Dependency Inversion Principle - depends on IConversationManager interface + */ +import OpenAI from 'openai'; +import Anthropic from '@anthropic-ai/sdk'; +import * as axios from 'axios'; +import { configHelper, CandidateProfile } from './ConfigHelper'; +import { IConversationManager } from './ConversationManager'; +import { + APIProvider, + DEFAULT_ANSWER_MODELS, +} from "../shared/aiModels"; + +// Interface for Gemini API requests +interface GeminiMessage { + role: string; + parts: Array<{ + text?: string; + }>; +} + +interface GeminiResponse { + candidates: Array<{ + content: { + parts: Array<{ + text: string; + }>; + }; + finishReason: string; + }>; +} + +export interface AnswerSuggestion { + suggestions: string[]; + reasoning: string; +} + +export interface IAnswerAssistant { + generateAnswerSuggestions( + currentQuestion: string, + conversationManager: IConversationManager, + screenshotContext?: string + ): Promise; +} + +export class AnswerAssistant implements IAnswerAssistant { + private openai: OpenAI | null = null; + private geminiApiKey: string | null = null; + private anthropic: Anthropic | null = null; + + private formatProviderError(provider: "openai" | "gemini" | "anthropic", error: any, context: string): string { + const status = + typeof error?.status === "number" + ? error.status + : typeof error?.response?.status === "number" + ? error.response.status + : undefined; + const message = error?.message || error?.response?.data?.error?.message || "Unknown error"; + const statusPart = status ? ` (status ${status})` : ""; + return `[${provider}] ${context} failed${statusPart}: ${message}`; + } + + constructor() { + this.initializeAIClients(); + + // Listen for config changes to re-initialize the AI clients + configHelper.on('config-updated', () => { + this.initializeAIClients(); + }); + } + + /** + * Initializes AI clients based on API provider from config + */ + private initializeAIClients(): void { + const config = configHelper.loadConfig(); + + // Reset all clients + this.openai = null; + this.geminiApiKey = null; + this.anthropic = null; + + if (!config.apiKey || config.apiKey.trim().length === 0) { + return; + } + + if (config.apiProvider === "openai") { + this.openai = new OpenAI({ apiKey: config.apiKey }); + } else if (config.apiProvider === "gemini") { + this.geminiApiKey = config.apiKey; + } else if (config.apiProvider === "anthropic") { + this.anthropic = new Anthropic({ apiKey: config.apiKey }); + } + } + + /** + * Generates answer suggestions based on conversation context + * @param currentQuestion - The current interviewer question + * @param conversationManager - Conversation manager instance (dependency injection) + * @param screenshotContext - Optional screenshot context for coding interviews + * @returns Promise resolving to answer suggestions + * @throws Error if AI client not initialized or request fails + */ + public async generateAnswerSuggestions( + currentQuestion: string, + conversationManager: IConversationManager, + screenshotContext?: string, + candidateProfile?: CandidateProfile + ): Promise { + const config = configHelper.loadConfig(); + + // Check if any AI client is initialized + if (!this.openai && !this.geminiApiKey && !this.anthropic) { + throw new Error('AI client not initialized. Please set API key in settings.'); + } + + if (!currentQuestion || currentQuestion.trim().length === 0) { + throw new Error('Current question cannot be empty'); + } + + const conversationHistory = conversationManager.getConversationHistory(); + const previousAnswers = conversationManager.getIntervieweeAnswers(); + + // Get candidate profile from config if not provided + const profile = candidateProfile || configHelper.loadConfig().candidateProfile; + + const contextPrompt = this.buildContextPrompt( + currentQuestion, + conversationHistory, + previousAnswers, + screenshotContext, + profile + ); + + const systemMessage = 'You are a helpful interview assistant supporting the candidate for this interview. Tailor suggestions to the job description when provided, and only use resume details when the question is about the candidate\'s background. Provide concise, actionable suggestions.'; + + try { + let suggestionsText = ''; + + // Get the configured answer model, fallback to default if not set + const answerModel = config.answerModel || DEFAULT_ANSWER_MODELS[config.apiProvider]; + + if (config.apiProvider === "openai" && this.openai) { + const response = await this.openai.chat.completions.create({ + model: answerModel, + messages: [ + { + role: 'system', + content: systemMessage + }, + { + role: 'user', + content: contextPrompt + } + ], + temperature: 0.7, + max_tokens: 500, + }); + + suggestionsText = response.choices[0]?.message?.content || ''; + } else if (config.apiProvider === "gemini" && this.geminiApiKey) { + const geminiMessages: GeminiMessage[] = [ + { + role: "user", + parts: [ + { + text: `${systemMessage}\n\n${contextPrompt}` + } + ] + } + ]; + + const response = await axios.default.post( + `https://generativelanguage.googleapis.com/v1beta/models/${answerModel}:generateContent?key=${this.geminiApiKey}`, + { + contents: geminiMessages, + generationConfig: { + temperature: 0.7, + maxOutputTokens: 500 + } + } + ); + + const responseData = response.data as GeminiResponse; + if (responseData.candidates && responseData.candidates.length > 0) { + suggestionsText = responseData.candidates[0].content.parts[0].text; + } + } else if (config.apiProvider === "anthropic" && this.anthropic) { + const response = await this.anthropic.messages.create({ + model: answerModel, + max_tokens: 500, + messages: [ + { + role: 'user', + content: `${systemMessage}\n\n${contextPrompt}` + } + ], + temperature: 0.7 + }); + + suggestionsText = (response.content[0] as { type: 'text', text: string }).text; + } else { + throw new Error('No AI client available. Please configure your API key in settings.'); + } + + const suggestions = this.parseSuggestions(suggestionsText); + + return { + suggestions: suggestions.length > 0 + ? suggestions + : ['Consider answering based on your experience and background.'], + reasoning: 'Based on conversation history and previous answers', + }; + } catch (error: any) { + console.error('Error generating suggestions:', error); + + // Provide specific error messages based on provider + const status = error?.status ?? error?.response?.status; + if (status === 401) { + throw new Error(this.formatProviderError(config.apiProvider, error, "Auth")); + } else if (status === 429) { + throw new Error(this.formatProviderError(config.apiProvider, error, "Rate limit")); + } + + throw new Error(this.formatProviderError(config.apiProvider, error, "Answer suggestion generation")); + } + } + + /** + * Builds the context prompt for the AI + */ + private buildContextPrompt( + currentQuestion: string, + conversationHistory: string, + previousAnswers: string[], + screenshotContext?: string, + candidateProfile?: CandidateProfile + ): string { + const shouldUseResume = this.isResumeRelevant(currentQuestion); + let prompt = `You are an AI assistant helping someone during an interview. +The interviewer just asked: "${currentQuestion}" + +Previous conversation: +${conversationHistory || 'No previous conversation yet.'} + +Previous answers the interviewee has given: +${previousAnswers.length > 0 ? previousAnswers.join('\n\n') : 'No previous answers yet.'} +`; + + if (candidateProfile?.jobDescription) { + prompt += `\n\nJob Description (use to tailor answers to this interview): +${candidateProfile.jobDescription}`; + } + + // Add candidate profile context if available + if (candidateProfile && shouldUseResume) { + const profileSections: string[] = []; + + if (candidateProfile.name) { + profileSections.push(`Name: ${candidateProfile.name}`); + } + + if (candidateProfile.resume) { + profileSections.push(`Resume: ${candidateProfile.resume}`); + } + + if (profileSections.length > 0) { + prompt += `\n\nCandidate Profile (use this to personalize suggestions): +${profileSections.join('\n')}`; + } + } + + prompt += `\n\nBased on the current question and conversation history${shouldUseResume && candidateProfile ? ', and candidate profile (resume only when relevant)' : ''}, provide 3-5 bullet point suggestions that: +1. Directly answer the current question +2. Reference and build upon previous answers for consistency +3. Maintain a coherent narrative +4. Are specific and actionable + +Format as simple bullet points, one per line starting with "-".`; + + if (screenshotContext) { + prompt += `\n\nAdditional context from code screenshot: ${screenshotContext}`; + } + + return prompt; + } + + /** + * Only treat resume as relevant when the question is about the candidate's background + */ + private isResumeRelevant(question: string): boolean { + if (!question) return false; + const q = question.toLowerCase(); + const resumeKeywords = [ + 'resume', + 'cv', + 'experience', + 'background', + 'work history', + 'employment', + 'projects', + 'portfolio', + 'skills', + 'education', + 'certification', + 'accomplishment', + 'achievement' + ]; + return resumeKeywords.some(keyword => q.includes(keyword)); + } + + /** + * Parses AI response into structured suggestions + * Handles multi-line suggestions and text after colons (e.g., "Explain that: ...") + */ + private parseSuggestions(suggestionsText: string): string[] { + const lines = suggestionsText.split('\n').map(line => line.trim()); + const suggestions: string[] = []; + let currentSuggestion = ''; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + + // Skip empty lines + if (!line) { + if (currentSuggestion) { + suggestions.push(currentSuggestion.trim()); + currentSuggestion = ''; + } + continue; + } + + // Check if this line starts a new suggestion (bullet point, number, or starts with capital letter after empty line) + const isNewSuggestion = + line.startsWith('-') || + line.startsWith('•') || + line.match(/^\d+\./) || + (i > 0 && !lines[i - 1] && line.length > 0 && line.length < 200); + + if (isNewSuggestion) { + // Save previous suggestion if exists + if (currentSuggestion) { + suggestions.push(currentSuggestion.trim()); + } + // Start new suggestion, removing bullet/number prefix + currentSuggestion = line + .replace(/^[-•]\s*/, '') + .replace(/^\d+\.\s*/, '') + .trim(); + } else if (currentSuggestion) { + // Continue current suggestion (multi-line) + currentSuggestion += ' ' + line; + } else if (line.length > 0 && line.length < 200) { + // Standalone line that might be a suggestion + currentSuggestion = line; + } + } + + // Don't forget the last suggestion + if (currentSuggestion) { + suggestions.push(currentSuggestion.trim()); + } + + // Filter out empty or too long suggestions, and clean up + return suggestions + .map(s => s.trim()) + .filter(s => s.length > 0 && s.length < 500) // Increased limit for multi-line suggestions + .map(s => { + // Clean up any extra whitespace + return s.replace(/\s+/g, ' ').trim(); + }); + } + + /** + * Checks if any AI client is initialized + */ + public isInitialized(): boolean { + return this.openai !== null || this.geminiApiKey !== null || this.anthropic !== null; + } +} diff --git a/electron/ConfigHelper.ts b/electron/ConfigHelper.ts index 6d1d2dba..06d77ac8 100644 --- a/electron/ConfigHelper.ts +++ b/electron/ConfigHelper.ts @@ -4,27 +4,50 @@ import path from "node:path" import { app } from "electron" import { EventEmitter } from "events" import { OpenAI } from "openai" +import { + APIProvider, + DEFAULT_PROVIDER, + DEFAULT_MODELS, + sanitizeModelSelection, +} from "../shared/aiModels"; + +export interface CandidateProfile { + name?: string; + resume?: string; // Full resume text + jobDescription?: string; // Target role/job description +} interface Config { apiKey: string; - apiProvider: "openai" | "gemini" | "anthropic"; // Added provider selection + apiProvider: APIProvider; // Added provider selection extractionModel: string; solutionModel: string; debuggingModel: string; + answerModel: string; // Model for AI answer suggestions in conversations + speechRecognitionModel: string; // Speech recognition model (Whisper for OpenAI) language: string; opacity: number; + candidateProfile?: CandidateProfile; // Candidate profile for personalized AI suggestions } export class ConfigHelper extends EventEmitter { private configPath: string; private defaultConfig: Config = { apiKey: "", - apiProvider: "gemini", // Default to Gemini - extractionModel: "gemini-2.0-flash", // Default to Flash for faster responses - solutionModel: "gemini-2.0-flash", - debuggingModel: "gemini-2.0-flash", + apiProvider: DEFAULT_PROVIDER, + extractionModel: DEFAULT_MODELS[DEFAULT_PROVIDER].extractionModel, + solutionModel: DEFAULT_MODELS[DEFAULT_PROVIDER].solutionModel, + debuggingModel: DEFAULT_MODELS[DEFAULT_PROVIDER].debuggingModel, + answerModel: DEFAULT_MODELS[DEFAULT_PROVIDER].answerModel, + speechRecognitionModel: + DEFAULT_MODELS.openai.speechRecognitionModel || "whisper-1", language: "python", - opacity: 1.0 + opacity: 1.0, + candidateProfile: { + name: "", + resume: "", + jobDescription: "" + } }; constructor() { @@ -56,38 +79,9 @@ export class ConfigHelper extends EventEmitter { } /** - * Validate and sanitize model selection to ensure only allowed models are used + * Validate and sanitize model selection to ensure only allowed models are used. + * Delegates to shared model configuration for single source of truth. */ - private sanitizeModelSelection(model: string, provider: "openai" | "gemini" | "anthropic"): string { - if (provider === "openai") { - // Only allow gpt-4o and gpt-4o-mini for OpenAI - const allowedModels = ['gpt-4o', 'gpt-4o-mini']; - if (!allowedModels.includes(model)) { - console.warn(`Invalid OpenAI model specified: ${model}. Using default model: gpt-4o`); - return 'gpt-4o'; - } - return model; - } else if (provider === "gemini") { - // Only allow gemini-1.5-pro and gemini-2.0-flash for Gemini - const allowedModels = ['gemini-1.5-pro', 'gemini-2.0-flash']; - if (!allowedModels.includes(model)) { - console.warn(`Invalid Gemini model specified: ${model}. Using default model: gemini-2.0-flash`); - return 'gemini-2.0-flash'; // Changed default to flash - } - return model; - } else if (provider === "anthropic") { - // Only allow Claude models - const allowedModels = ['claude-3-7-sonnet-20250219', 'claude-3-5-sonnet-20241022', 'claude-3-opus-20240229']; - if (!allowedModels.includes(model)) { - console.warn(`Invalid Anthropic model specified: ${model}. Using default model: claude-3-7-sonnet-20250219`); - return 'claude-3-7-sonnet-20250219'; - } - return model; - } - // Default fallback - return model; - } - public loadConfig(): Config { try { if (fs.existsSync(this.configPath)) { @@ -96,18 +90,57 @@ export class ConfigHelper extends EventEmitter { // Ensure apiProvider is a valid value if (config.apiProvider !== "openai" && config.apiProvider !== "gemini" && config.apiProvider !== "anthropic") { - config.apiProvider = "gemini"; // Default to Gemini if invalid + config.apiProvider = DEFAULT_PROVIDER; // Default to shared provider if invalid } // Sanitize model selections to ensure only allowed models are used if (config.extractionModel) { - config.extractionModel = this.sanitizeModelSelection(config.extractionModel, config.apiProvider); + config.extractionModel = sanitizeModelSelection( + config.extractionModel, + config.apiProvider, + "extractionModel" + ); } if (config.solutionModel) { - config.solutionModel = this.sanitizeModelSelection(config.solutionModel, config.apiProvider); + config.solutionModel = sanitizeModelSelection( + config.solutionModel, + config.apiProvider, + "solutionModel" + ); } if (config.debuggingModel) { - config.debuggingModel = this.sanitizeModelSelection(config.debuggingModel, config.apiProvider); + config.debuggingModel = sanitizeModelSelection( + config.debuggingModel, + config.apiProvider, + "debuggingModel" + ); + } + if (config.answerModel) { + config.answerModel = sanitizeModelSelection( + config.answerModel, + config.apiProvider, + "answerModel" + ); + } + + // Ensure speechRecognitionModel is valid + if (config.speechRecognitionModel) { + if (config.apiProvider === "openai" && config.speechRecognitionModel !== "whisper-1") { + config.speechRecognitionModel = "whisper-1"; + } else if (config.apiProvider === "gemini") { + const allowedGeminiSpeechModels = [ + "gemini-1.5-flash", + "gemini-1.5-pro", + "gemini-3-flash-preview", + "gemini-3-pro-preview", + "gemini-2.0-flash-exp" + ]; + if (!allowedGeminiSpeechModels.includes(config.speechRecognitionModel)) { + config.speechRecognitionModel = DEFAULT_MODELS.gemini.speechRecognitionModel || "gemini-3-flash-preview"; + } + } + } else if (!config.speechRecognitionModel) { + config.speechRecognitionModel = this.defaultConfig.speechRecognitionModel; } return { @@ -148,7 +181,7 @@ export class ConfigHelper extends EventEmitter { public updateConfig(updates: Partial): Config { try { const currentConfig = this.loadConfig(); - let provider = updates.apiProvider || currentConfig.apiProvider; + let provider: APIProvider = updates.apiProvider || currentConfig.apiProvider; // Auto-detect provider based on API key format if a new key is provided if (updates.apiKey && !updates.apiProvider) { @@ -170,30 +203,67 @@ export class ConfigHelper extends EventEmitter { // If provider is changing, reset models to the default for that provider if (updates.apiProvider && updates.apiProvider !== currentConfig.apiProvider) { - if (updates.apiProvider === "openai") { - updates.extractionModel = "gpt-4o"; - updates.solutionModel = "gpt-4o"; - updates.debuggingModel = "gpt-4o"; - } else if (updates.apiProvider === "anthropic") { - updates.extractionModel = "claude-3-7-sonnet-20250219"; - updates.solutionModel = "claude-3-7-sonnet-20250219"; - updates.debuggingModel = "claude-3-7-sonnet-20250219"; - } else { - updates.extractionModel = "gemini-2.0-flash"; - updates.solutionModel = "gemini-2.0-flash"; - updates.debuggingModel = "gemini-2.0-flash"; + const defaults = DEFAULT_MODELS[updates.apiProvider]; + updates.extractionModel = defaults.extractionModel; + updates.solutionModel = defaults.solutionModel; + updates.debuggingModel = defaults.debuggingModel; + updates.answerModel = defaults.answerModel; + // Speech recognition supported for OpenAI and Gemini + if (defaults.speechRecognitionModel) { + updates.speechRecognitionModel = defaults.speechRecognitionModel; + } + } + + // Validate speech recognition model + if (updates.speechRecognitionModel) { + if (provider === "openai" && updates.speechRecognitionModel !== "whisper-1") { + console.warn(`Invalid speech recognition model: ${updates.speechRecognitionModel}. Only whisper-1 is supported for OpenAI.`); + updates.speechRecognitionModel = "whisper-1"; + } else if (provider === "gemini") { + // Validate Gemini models that support audio understanding + const allowedGeminiSpeechModels = [ + "gemini-1.5-flash", + "gemini-1.5-pro", + "gemini-3-flash-preview", + "gemini-3-pro-preview", + "gemini-2.0-flash-exp" + ]; + if (!allowedGeminiSpeechModels.includes(updates.speechRecognitionModel)) { + const defaultModel = DEFAULT_MODELS[provider].speechRecognitionModel || "gemini-3-flash-preview"; + console.warn(`Invalid Gemini speech recognition model: ${updates.speechRecognitionModel}. Using default: ${defaultModel}`); + updates.speechRecognitionModel = defaultModel; + } } } // Sanitize model selections in the updates if (updates.extractionModel) { - updates.extractionModel = this.sanitizeModelSelection(updates.extractionModel, provider); + updates.extractionModel = sanitizeModelSelection( + updates.extractionModel, + provider, + "extractionModel" + ); } if (updates.solutionModel) { - updates.solutionModel = this.sanitizeModelSelection(updates.solutionModel, provider); + updates.solutionModel = sanitizeModelSelection( + updates.solutionModel, + provider, + "solutionModel" + ); } if (updates.debuggingModel) { - updates.debuggingModel = this.sanitizeModelSelection(updates.debuggingModel, provider); + updates.debuggingModel = sanitizeModelSelection( + updates.debuggingModel, + provider, + "debuggingModel" + ); + } + if (updates.answerModel) { + updates.answerModel = sanitizeModelSelection( + updates.answerModel, + provider, + "answerModel" + ); } const newConfig = { ...currentConfig, ...updates }; @@ -203,7 +273,9 @@ export class ConfigHelper extends EventEmitter { // This prevents re-initializing the AI client when only opacity changes if (updates.apiKey !== undefined || updates.apiProvider !== undefined || updates.extractionModel !== undefined || updates.solutionModel !== undefined || - updates.debuggingModel !== undefined || updates.language !== undefined) { + updates.debuggingModel !== undefined || updates.answerModel !== undefined || + updates.speechRecognitionModel !== undefined || + updates.language !== undefined) { this.emit('config-updated', newConfig); } diff --git a/electron/ConversationManager.ts b/electron/ConversationManager.ts new file mode 100644 index 00000000..4df0319c --- /dev/null +++ b/electron/ConversationManager.ts @@ -0,0 +1,152 @@ +/** + * ConversationManager - Manages conversation state and messages + * Follows Single Responsibility Principle - only handles conversation state + * Uses EventEmitter for loose coupling (Observer pattern) + */ +import { EventEmitter } from 'events'; + +export interface ConversationMessage { + id: string; + speaker: 'interviewer' | 'interviewee'; + text: string; + timestamp: number; + edited?: boolean; +} + +export interface IConversationManager { + addMessage(text: string, speaker?: 'interviewer' | 'interviewee'): ConversationMessage; + toggleSpeaker(): 'interviewer' | 'interviewee'; + getCurrentSpeaker(): 'interviewer' | 'interviewee'; + getMessages(): ConversationMessage[]; + getConversationHistory(): string; + getIntervieweeAnswers(): string[]; + updateMessage(messageId: string, newText: string): boolean; + clearConversation(): void; + setSpeaker(speaker: 'interviewer' | 'interviewee'): void; +} + +export class ConversationManager extends EventEmitter implements IConversationManager { + private messages: ConversationMessage[] = []; + private currentSpeaker: 'interviewer' | 'interviewee' = 'interviewee'; + + /** + * Adds a new message to the conversation + * @param text - Message text + * @param speaker - Optional speaker override, uses current speaker if not provided + * @returns The created message + */ + public addMessage( + text: string, + speaker?: 'interviewer' | 'interviewee' + ): ConversationMessage { + if (!text || text.trim().length === 0) { + throw new Error('Message text cannot be empty'); + } + + const message: ConversationMessage = { + id: this.generateMessageId(), + speaker: speaker || this.currentSpeaker, + text: text.trim(), + timestamp: Date.now(), + }; + + this.messages.push(message); + this.emit('message-added', message); + return message; + } + + /** + * Toggles between interviewer and interviewee speaker modes + * @returns The new speaker mode + */ + public toggleSpeaker(): 'interviewer' | 'interviewee' { + this.currentSpeaker = this.currentSpeaker === 'interviewer' + ? 'interviewee' + : 'interviewer'; + this.emit('speaker-changed', this.currentSpeaker); + return this.currentSpeaker; + } + + /** + * Sets the current speaker mode + * @param speaker - Speaker mode to set + */ + public setSpeaker(speaker: 'interviewer' | 'interviewee'): void { + if (this.currentSpeaker !== speaker) { + this.currentSpeaker = speaker; + this.emit('speaker-changed', this.currentSpeaker); + } + } + + /** + * Gets the current speaker mode + */ + public getCurrentSpeaker(): 'interviewer' | 'interviewee' { + return this.currentSpeaker; + } + + /** + * Gets all messages in the conversation + * @returns Copy of messages array (immutable) + */ + public getMessages(): ConversationMessage[] { + return [...this.messages]; + } + + /** + * Gets conversation history as formatted string + * @returns Formatted conversation history + */ + public getConversationHistory(): string { + return this.messages + .map(msg => `[${msg.speaker === 'interviewer' ? 'Interviewer' : 'You'}] ${msg.text}`) + .join('\n\n'); + } + + /** + * Gets all answers from the interviewee + * @returns Array of interviewee answer texts + */ + public getIntervieweeAnswers(): string[] { + return this.messages + .filter(msg => msg.speaker === 'interviewee') + .map(msg => msg.text); + } + + /** + * Updates an existing message + * @param messageId - ID of message to update + * @param newText - New text for the message + * @returns True if message was found and updated, false otherwise + */ + public updateMessage(messageId: string, newText: string): boolean { + if (!newText || newText.trim().length === 0) { + return false; + } + + const message = this.messages.find(m => m.id === messageId); + if (message) { + message.text = newText.trim(); + message.edited = true; + this.emit('message-updated', message); + return true; + } + return false; + } + + /** + * Clears all messages and resets to default speaker + */ + public clearConversation(): void { + this.messages = []; + this.currentSpeaker = 'interviewee'; + this.emit('conversation-cleared'); + } + + /** + * Generates a unique message ID + */ + private generateMessageId(): string { + return `msg-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`; + } +} diff --git a/electron/ProcessingHelper.ts b/electron/ProcessingHelper.ts index 0dcd26f0..7d13bb88 100644 --- a/electron/ProcessingHelper.ts +++ b/electron/ProcessingHelper.ts @@ -8,6 +8,10 @@ import { app, BrowserWindow, dialog } from "electron" import { OpenAI } from "openai" import { configHelper } from "./ConfigHelper" import Anthropic from '@anthropic-ai/sdk'; +import { + APIProvider, + DEFAULT_MODELS, +} from "../shared/aiModels"; // Interface for Gemini API requests interface GeminiMessage { @@ -53,6 +57,18 @@ export class ProcessingHelper { // AbortControllers for API requests private currentProcessingAbortController: AbortController | null = null private currentExtraProcessingAbortController: AbortController | null = null + + private formatProviderError(provider: "openai" | "gemini" | "anthropic", error: any, context: string): string { + const status = + typeof error?.status === "number" + ? error.status + : typeof error?.response?.status === "number" + ? error.response.status + : undefined; + const message = error?.message || error?.response?.data?.error?.message || "Unknown error"; + const statusPart = status ? ` (status ${status})` : ""; + return `[${provider}] ${context} failed${statusPart}: ${message}`; + } constructor(deps: IProcessingHelperDeps) { this.deps = deps @@ -66,6 +82,22 @@ export class ProcessingHelper { this.initializeAIClient(); }); } + + /** + * Get conversation context for integration with screenshot processing + */ + private getConversationContext(): string | null { + try { + const conversationManager = this.deps.getConversationManager?.(); + if (conversationManager) { + const history = conversationManager.getConversationHistory(); + return history && history.trim().length > 0 ? history : null; + } + } catch (error) { + console.error('Error getting conversation context:', error); + } + return null; + } /** * Initialize or reinitialize the AI client with current config @@ -473,18 +505,29 @@ export class ProcessingHelper { } } + // Get conversation context if available + const conversationContext = this.getConversationContext(); + // Use OpenAI for processing + const systemPrompt = conversationContext + ? `You are a coding challenge interpreter. Analyze the screenshot of the coding problem and extract all relevant information. Consider the conversation context provided. Return the information in JSON format with these fields: problem_statement, constraints, example_input, example_output. Just return the structured JSON without any other text.` + : "You are a coding challenge interpreter. Analyze the screenshot of the coding problem and extract all relevant information. Return the information in JSON format with these fields: problem_statement, constraints, example_input, example_output. Just return the structured JSON without any other text."; + + const userPrompt = conversationContext + ? `Extract the coding problem details from these screenshots. Consider the following conversation context:\n\n${conversationContext}\n\nReturn in JSON format. Preferred coding language we gonna use for this problem is ${language}.` + : `Extract the coding problem details from these screenshots. Return in JSON format. Preferred coding language we gonna use for this problem is ${language}.`; + const messages = [ { role: "system" as const, - content: "You are a coding challenge interpreter. Analyze the screenshot of the coding problem and extract all relevant information. Return the information in JSON format with these fields: problem_statement, constraints, example_input, example_output. Just return the structured JSON without any other text." + content: systemPrompt }, { role: "user" as const, content: [ { type: "text" as const, - text: `Extract the coding problem details from these screenshots. Return in JSON format. Preferred coding language we gonna use for this problem is ${language}.` + text: userPrompt }, ...imageDataList.map(data => ({ type: "image_url" as const, @@ -525,13 +568,20 @@ export class ProcessingHelper { } try { + // Get conversation context if available + const conversationContext = this.getConversationContext(); + + const geminiPrompt = conversationContext + ? `You are a coding challenge interpreter. Analyze the screenshots of the coding problem and extract all relevant information. Consider the following conversation context:\n\n${conversationContext}\n\nReturn the information in JSON format with these fields: problem_statement, constraints, example_input, example_output. Just return the structured JSON without any other text. Preferred coding language we gonna use for this problem is ${language}.` + : `You are a coding challenge interpreter. Analyze the screenshots of the coding problem and extract all relevant information. Return the information in JSON format with these fields: problem_statement, constraints, example_input, example_output. Just return the structured JSON without any other text. Preferred coding language we gonna use for this problem is ${language}.`; + // Create Gemini message structure const geminiMessages: GeminiMessage[] = [ { role: "user", parts: [ { - text: `You are a coding challenge interpreter. Analyze the screenshots of the coding problem and extract all relevant information. Return the information in JSON format with these fields: problem_statement, constraints, example_input, example_output. Just return the structured JSON without any other text. Preferred coding language we gonna use for this problem is ${language}.` + text: geminiPrompt }, ...imageDataList.map(data => ({ inlineData: { @@ -545,7 +595,7 @@ export class ProcessingHelper { // Make API request to Gemini const response = await axios.default.post( - `https://generativelanguage.googleapis.com/v1beta/models/${config.extractionModel || "gemini-2.0-flash"}:generateContent?key=${this.geminiApiKey}`, + `https://generativelanguage.googleapis.com/v1beta/models/${config.extractionModel || "gemini-3-flash-latest"}:generateContent?key=${this.geminiApiKey}`, { contents: geminiMessages, generationConfig: { @@ -571,7 +621,7 @@ export class ProcessingHelper { console.error("Error using Gemini API:", error); return { success: false, - error: "Failed to process with Gemini API. Please check your API key or try again later." + error: this.formatProviderError("gemini", error, "Problem extraction") }; } } else if (config.apiProvider === "anthropic") { @@ -583,13 +633,20 @@ export class ProcessingHelper { } try { + // Get conversation context if available + const conversationContext = this.getConversationContext(); + + const anthropicPrompt = conversationContext + ? `Extract the coding problem details from these screenshots. Consider the following conversation context:\n\n${conversationContext}\n\nReturn in JSON format with these fields: problem_statement, constraints, example_input, example_output. Preferred coding language is ${language}.` + : `Extract the coding problem details from these screenshots. Return in JSON format with these fields: problem_statement, constraints, example_input, example_output. Preferred coding language is ${language}.`; + const messages = [ { role: "user" as const, content: [ { type: "text" as const, - text: `Extract the coding problem details from these screenshots. Return in JSON format with these fields: problem_statement, constraints, example_input, example_output. Preferred coding language is ${language}.` + text: anthropicPrompt }, ...imageDataList.map(data => ({ type: "image" as const, @@ -631,7 +688,7 @@ export class ProcessingHelper { return { success: false, - error: "Failed to process with Anthropic API. Please check your API key or try again later." + error: this.formatProviderError("anthropic", error, "Problem extraction") }; } } @@ -688,28 +745,31 @@ export class ProcessingHelper { }; } + const config = configHelper.loadConfig(); + const provider: APIProvider = config.apiProvider; + // Handle OpenAI API errors specifically if (error?.response?.status === 401) { return { success: false, - error: "Invalid OpenAI API key. Please check your settings." + error: this.formatProviderError(provider, error, "Auth") }; } else if (error?.response?.status === 429) { return { success: false, - error: "OpenAI API rate limit exceeded or insufficient credits. Please try again later." + error: this.formatProviderError(provider, error, "Rate limit / quota") }; } else if (error?.response?.status === 500) { return { success: false, - error: "OpenAI server error. Please try again later." + error: this.formatProviderError(provider, error, "Server error") }; } console.error("API Error Details:", error); return { success: false, - error: error.message || "Failed to process screenshots. Please try again." + error: this.formatProviderError(provider, error, "Processing screenshots") }; } } @@ -809,7 +869,7 @@ Your solution should be efficient, well-commented, and handle edge cases. // Make API request to Gemini const response = await axios.default.post( - `https://generativelanguage.googleapis.com/v1beta/models/${config.solutionModel || "gemini-2.0-flash"}:generateContent?key=${this.geminiApiKey}`, + `https://generativelanguage.googleapis.com/v1beta/models/${config.solutionModel || "gemini-3-flash-latest"}:generateContent?key=${this.geminiApiKey}`, { contents: geminiMessages, generationConfig: { @@ -831,7 +891,7 @@ Your solution should be efficient, well-commented, and handle edge cases. console.error("Error using Gemini API for solution:", error); return { success: false, - error: "Failed to generate solution with Gemini API. Please check your API key or try again later." + error: this.formatProviderError("gemini", error, "Solution generation") }; } } else if (config.apiProvider === "anthropic") { @@ -883,7 +943,7 @@ Your solution should be efficient, well-commented, and handle edge cases. return { success: false, - error: "Failed to generate solution with Anthropic API. Please check your API key or try again later." + error: this.formatProviderError("anthropic", error, "Solution generation") }; } } @@ -968,17 +1028,17 @@ Your solution should be efficient, well-commented, and handle edge cases. if (error?.response?.status === 401) { return { success: false, - error: "Invalid OpenAI API key. Please check your settings." + error: this.formatProviderError(configHelper.loadConfig().apiProvider, error, "Auth") }; } else if (error?.response?.status === 429) { return { success: false, - error: "OpenAI API rate limit exceeded or insufficient credits. Please try again later." + error: this.formatProviderError(configHelper.loadConfig().apiProvider, error, "Rate limit / quota") }; } console.error("Solution generation error:", error); - return { success: false, error: error.message || "Failed to generate solution" }; + return { success: false, error: this.formatProviderError(configHelper.loadConfig().apiProvider, error, "Solution generation") }; } } @@ -1130,7 +1190,7 @@ If you include code examples, use proper markdown code blocks with language spec } const response = await axios.default.post( - `https://generativelanguage.googleapis.com/v1beta/models/${config.debuggingModel || "gemini-2.0-flash"}:generateContent?key=${this.geminiApiKey}`, + `https://generativelanguage.googleapis.com/v1beta/models/${config.debuggingModel || "gemini-3-flash-latest"}:generateContent?key=${this.geminiApiKey}`, { contents: geminiMessages, generationConfig: { @@ -1152,7 +1212,7 @@ If you include code examples, use proper markdown code blocks with language spec console.error("Error using Gemini API for debugging:", error); return { success: false, - error: "Failed to process debug request with Gemini API. Please check your API key or try again later." + error: this.formatProviderError("gemini", error, "Debugging") }; } } else if (config.apiProvider === "anthropic") { @@ -1241,7 +1301,7 @@ If you include code examples, use proper markdown code blocks with language spec return { success: false, - error: "Failed to process debug request with Anthropic API. Please check your API key or try again later." + error: this.formatProviderError("anthropic", error, "Debugging") }; } } @@ -1286,7 +1346,7 @@ If you include code examples, use proper markdown code blocks with language spec return { success: true, data: response }; } catch (error: any) { console.error("Debug processing error:", error); - return { success: false, error: error.message || "Failed to process debug request" }; + return { success: false, error: this.formatProviderError(configHelper.loadConfig().apiProvider, error, "Debug processing") }; } } diff --git a/electron/TranscriptionHelper.ts b/electron/TranscriptionHelper.ts new file mode 100644 index 00000000..a61d0aa7 --- /dev/null +++ b/electron/TranscriptionHelper.ts @@ -0,0 +1,341 @@ +/** + * TranscriptionHelper - Handles audio transcription using various AI providers + * Follows Single Responsibility Principle - only handles transcription + * Supports multiple providers: OpenAI (Whisper), Gemini (Audio Understanding), Anthropic (future) + */ +import OpenAI from 'openai'; +import Anthropic from '@anthropic-ai/sdk'; +import * as axios from 'axios'; +import fs from 'fs'; +import path from 'path'; +import { app } from 'electron'; +import { configHelper } from './ConfigHelper'; + +export interface TranscriptionResult { + text: string; + language?: string; +} + +export interface ITranscriptionHelper { + transcribeAudio(audioBuffer: Buffer, mimeType?: string): Promise; +} + +export class TranscriptionHelper implements ITranscriptionHelper { + private openai: OpenAI | null = null; + private geminiApiKey: string | null = null; + private anthropic: Anthropic | null = null; + private readonly tempDir: string; + + // Default models for each provider + private readonly defaultOpenAIModel: string = 'whisper-1'; + private readonly defaultGeminiModel: string = 'gemini-3-flash-preview'; // Gemini model with audio understanding support + private readonly defaultAnthropicModel: string = ''; // To be set when Anthropic speech recognition is available + + constructor() { + this.tempDir = path.join(app.getPath('temp'), 'audio-transcriptions'); + this.ensureTempDirectory(); + this.initializeAIClients(); + + // Listen for config changes to re-initialize + configHelper.on('config-updated', () => { + this.initializeAIClients(); + }); + } + + /** + * Initializes AI clients based on API provider from config + * Supports OpenAI (Whisper) and Gemini (Audio Understanding) + */ + private initializeAIClients(): void { + const config = configHelper.loadConfig(); + + // Reset all clients + this.openai = null; + this.geminiApiKey = null; + this.anthropic = null; + + if (!config.apiKey || config.apiKey.trim().length === 0) { + return; + } + + if (config.apiProvider === "openai") { + this.openai = new OpenAI({ apiKey: config.apiKey }); + console.log("OpenAI transcription client initialized"); + } else if (config.apiProvider === "gemini") { + this.geminiApiKey = config.apiKey; + console.log("Gemini API key set for audio understanding"); + } else if (config.apiProvider === "anthropic") { + // Future: Initialize Anthropic client when speech recognition is available + this.anthropic = new Anthropic({ apiKey: config.apiKey }); + console.log("Anthropic client initialized (speech recognition not yet available)"); + } + } + + /** + * Checks if the current provider supports speech recognition + */ + private isSpeechRecognitionSupported(provider: "openai" | "gemini" | "anthropic"): boolean { + // OpenAI (Whisper) and Gemini (Audio Understanding) support speech recognition + return provider === "openai" || provider === "gemini"; + } + + private formatProviderError(provider: "openai" | "gemini" | "anthropic", error: any, context: string): string { + const status = + typeof error?.status === "number" + ? error.status + : typeof error?.response?.status === "number" + ? error.response.status + : undefined; + const message = error?.message || error?.response?.data?.error?.message || "Unknown error"; + const statusPart = status ? ` (status ${status})` : ""; + return `[${provider}] ${context} failed${statusPart}: ${message}`; + } + + /** + * Ensures temp directory exists for audio files + */ + private ensureTempDirectory(): void { + if (!fs.existsSync(this.tempDir)) { + fs.mkdirSync(this.tempDir, { recursive: true }); + } + } + + /** + * Transcribes audio buffer using the configured AI provider + * @param audioBuffer - Audio data as Buffer + * @param mimeType - MIME type of the audio (default: 'audio/webm') + * @returns Promise resolving to transcription result + * @throws Error if transcription fails or AI client not initialized + */ + public async transcribeAudio( + audioBuffer: Buffer, + mimeType: string = 'audio/webm' + ): Promise { + const config = configHelper.loadConfig(); + + // Check if speech recognition is supported for the current provider + if (!this.isSpeechRecognitionSupported(config.apiProvider)) { + throw new Error(`Speech recognition is currently only supported with OpenAI or Gemini providers. Please switch to one of these providers in settings.`); + } + + if (!audioBuffer || audioBuffer.length === 0) { + throw new Error('Audio buffer is empty'); + } + + // Route to the appropriate provider's transcription method + if (config.apiProvider === "openai") { + return this.transcribeWithOpenAI(audioBuffer, mimeType); + } else if (config.apiProvider === "gemini") { + return this.transcribeWithGemini(audioBuffer, mimeType); + } else if (config.apiProvider === "anthropic") { + return this.transcribeWithAnthropic(audioBuffer, mimeType); + } else { + throw new Error(`Unsupported API provider: ${config.apiProvider}`); + } + } + + /** + * Transcribes audio using OpenAI Whisper API + */ + private async transcribeWithOpenAI( + audioBuffer: Buffer, + mimeType: string + ): Promise { + if (!this.openai) { + throw new Error('OpenAI client not initialized. Please set OpenAI API key in settings.'); + } + + const tempPath = path.join(this.tempDir, `audio-${Date.now()}-${Math.random().toString(36).substring(7)}.webm`); + + try { + // Write buffer to temp file + fs.writeFileSync(tempPath, audioBuffer); + + // Create read stream for OpenAI API + const file = fs.createReadStream(tempPath); + + // Get speech recognition model from config + const config = configHelper.loadConfig(); + const speechModel = config.speechRecognitionModel || this.defaultOpenAIModel; + + // Transcribe using Whisper API + const transcription = await this.openai.audio.transcriptions.create({ + file: file, + model: speechModel, + language: 'en', // Optional: can be auto-detected + response_format: 'verbose_json', + }); + + // Clean up temp file + this.cleanupTempFile(tempPath); + + return { + text: transcription.text, + language: transcription.language, + }; + } catch (error: any) { + // Clean up on error + this.cleanupTempFile(tempPath); + + console.error('OpenAI transcription error:', error); + + // Provide more specific error messages + const status = error?.status ?? error?.response?.status; + if (status === 401) { + throw new Error(this.formatProviderError("openai", error, "Auth")); + } else if (status === 429) { + throw new Error(this.formatProviderError("openai", error, "Rate limit")); + } else if (error.message?.includes('file')) { + throw new Error(this.formatProviderError("openai", error, "Invalid audio file")); + } + + throw new Error(this.formatProviderError("openai", error, "Transcription")); + } + } + + /** + * Transcribes audio using Gemini API Audio Understanding + * Uses Gemini's multimodal capabilities to transcribe audio to text + */ + private async transcribeWithGemini( + audioBuffer: Buffer, + mimeType: string + ): Promise { + if (!this.geminiApiKey) { + throw new Error('Gemini API key not initialized. Please set Gemini API key in settings.'); + } + + // Get speech recognition model from config + const config = configHelper.loadConfig(); + const speechModel = config.speechRecognitionModel || this.defaultGeminiModel; + + try { + // Convert audio buffer to base64 + const audioBase64 = audioBuffer.toString('base64'); + + // Normalize MIME type for Gemini API + // Gemini supports: audio/mpeg, audio/mp3, audio/wav, audio/flac, audio/webm, audio/m4a, audio/ogg + let normalizedMimeType = mimeType; + if (mimeType === 'audio/webm') { + normalizedMimeType = 'audio/webm'; + } else if (mimeType.includes('mp3') || mimeType.includes('mpeg')) { + normalizedMimeType = 'audio/mpeg'; + } else if (mimeType.includes('wav')) { + normalizedMimeType = 'audio/wav'; + } else if (mimeType.includes('flac')) { + normalizedMimeType = 'audio/flac'; + } else if (mimeType.includes('m4a')) { + normalizedMimeType = 'audio/m4a'; + } else if (mimeType.includes('ogg')) { + normalizedMimeType = 'audio/ogg'; + } + + // Create Gemini message with audio data and transcription prompt + const geminiMessages = [ + { + role: "user", + parts: [ + { + text: "Please transcribe this audio to text. Return only the transcribed text without any additional commentary." + }, + { + inlineData: { + mimeType: normalizedMimeType, + data: audioBase64 + } + } + ] + } + ]; + + // Make API request to Gemini + const response = await axios.default.post( + `https://generativelanguage.googleapis.com/v1beta/models/${speechModel}:generateContent?key=${this.geminiApiKey}`, + { + contents: geminiMessages, + generationConfig: { + temperature: 0.1, // Low temperature for accurate transcription + maxOutputTokens: 4096 + } + } + ); + + const responseData = response.data; + + // Extract transcription text from response + if (!responseData.candidates || responseData.candidates.length === 0) { + throw new Error("Empty response from Gemini API"); + } + + const transcriptionText = responseData.candidates[0].content.parts[0].text; + + // Gemini doesn't provide language detection in the same way as Whisper + // We can try to extract it from the response if available, otherwise return undefined + const language = responseData.candidates[0]?.content?.parts[0]?.text?.match(/\[Language: (\w+)\]/)?.[1]; + + return { + text: transcriptionText.trim(), + language: language || undefined, + }; + } catch (error: any) { + console.error('Gemini transcription error:', error); + + // Provide more specific error messages + const status = error?.status ?? error?.response?.status; + if (status === 401) { + throw new Error(this.formatProviderError("gemini", error, "Auth")); + } else if (status === 429) { + throw new Error(this.formatProviderError("gemini", error, "Rate limit")); + } else if (status === 400) { + throw new Error(this.formatProviderError("gemini", error, "Invalid audio file or request")); + } + + throw new Error(this.formatProviderError("gemini", error, "Transcription")); + } + } + + /** + * Transcribes audio using Anthropic API (Future implementation) + * TODO: Implement when Anthropic speech recognition becomes available + */ + private async transcribeWithAnthropic( + audioBuffer: Buffer, + mimeType: string + ): Promise { + if (!this.anthropic) { + throw new Error('Anthropic client not initialized. Please set Anthropic API key in settings.'); + } + + // TODO: Implement Anthropic speech recognition when available + throw new Error('Anthropic speech recognition is not yet available. Please use OpenAI provider for transcription.'); + } + + /** + * Safely removes temporary file + */ + private cleanupTempFile(filePath: string): void { + try { + if (fs.existsSync(filePath)) { + fs.unlinkSync(filePath); + } + } catch (error) { + console.error('Error cleaning up temp file:', error); + // Don't throw - cleanup errors shouldn't break the flow + } + } + + /** + * Checks if any AI client is initialized + */ + public isInitialized(): boolean { + return this.openai !== null || this.geminiApiKey !== null || this.anthropic !== null; + } + + /** + * Checks if speech recognition is available for the current provider + */ + public isSpeechRecognitionAvailable(): boolean { + const config = configHelper.loadConfig(); + return this.isSpeechRecognitionSupported(config.apiProvider) && this.isInitialized(); + } +} diff --git a/electron/ipcHandlers.ts b/electron/ipcHandlers.ts index f05a9aee..b2ce8bd1 100644 --- a/electron/ipcHandlers.ts +++ b/electron/ipcHandlers.ts @@ -348,4 +348,146 @@ export function initializeIpcHandlers(deps: IIpcHandlerDeps): void { return { success: false, error: "Failed to delete last screenshot" } } }) + + // ============================================ + // Conversation & Transcription Handlers + // ============================================ + + // Transcription handler - receives audio buffer from renderer + ipcMain.handle("transcribe-audio", async (_event, audioBuffer: ArrayBuffer, mimeType: string) => { + try { + if (!deps.transcriptionHelper) { + return { success: false, error: "Transcription helper not initialized" }; + } + + const buffer = Buffer.from(audioBuffer); + const result = await deps.transcriptionHelper.transcribeAudio(buffer, mimeType); + return { success: true, result }; + } catch (error: any) { + console.error("Transcription error:", error); + return { success: false, error: error.message || "Transcription failed" }; + } + }) + + // Conversation message handlers + ipcMain.handle("add-conversation-message", (_event, text: string, speaker?: string) => { + try { + if (!deps.conversationManager) { + return { success: false, error: "Conversation manager not initialized" }; + } + + const message = deps.conversationManager.addMessage(text, speaker as any); + return { success: true, message }; + } catch (error: any) { + console.error("Error adding message:", error); + return { success: false, error: error.message || "Failed to add message" }; + } + }) + + ipcMain.handle("toggle-speaker", () => { + try { + if (!deps.conversationManager) { + return { success: false, error: "Conversation manager not initialized" }; + } + + const speaker = deps.conversationManager.toggleSpeaker(); + return { success: true, speaker }; + } catch (error: any) { + console.error("Error toggling speaker:", error); + return { success: false, error: error.message || "Failed to toggle speaker" }; + } + }) + + ipcMain.handle("get-conversation", () => { + try { + if (!deps.conversationManager) { + return { success: false, error: "Conversation manager not initialized", messages: [] }; + } + + const messages = deps.conversationManager.getMessages(); + return { success: true, messages }; + } catch (error: any) { + console.error("Error getting conversation:", error); + return { success: false, error: error.message || "Failed to get conversation", messages: [] }; + } + }) + + ipcMain.handle("clear-conversation", () => { + try { + if (!deps.conversationManager) { + return { success: false, error: "Conversation manager not initialized" }; + } + + deps.conversationManager.clearConversation(); + return { success: true }; + } catch (error: any) { + console.error("Error clearing conversation:", error); + return { success: false, error: error.message || "Failed to clear conversation" }; + } + }) + + ipcMain.handle("update-conversation-message", (_event, messageId: string, newText: string) => { + try { + if (!deps.conversationManager) { + return { success: false, error: "Conversation manager not initialized" }; + } + + const success = deps.conversationManager.updateMessage(messageId, newText); + return { success }; + } catch (error: any) { + console.error("Error updating message:", error); + return { success: false, error: error.message || "Failed to update message" }; + } + }) + + // AI suggestion handler + ipcMain.handle("get-answer-suggestions", async (_event, question: string, screenshotContext?: string, candidateProfile?: any) => { + try { + if (!deps.answerAssistant || !deps.conversationManager) { + return { success: false, error: "Answer assistant or conversation manager not initialized" }; + } + + const suggestions = await deps.answerAssistant.generateAnswerSuggestions( + question, + deps.conversationManager, + screenshotContext, + candidateProfile + ); + return { success: true, suggestions }; + } catch (error: any) { + console.error("Error generating suggestions:", error); + return { success: false, error: error.message || "Failed to generate suggestions" }; + } + }) + + // Event listeners for conversation events + if (deps.conversationManager) { + deps.conversationManager.on('message-added', (message) => { + const mainWindow = deps.getMainWindow(); + if (mainWindow && !mainWindow.isDestroyed()) { + mainWindow.webContents.send('conversation-message-added', message); + } + }); + + deps.conversationManager.on('speaker-changed', (speaker) => { + const mainWindow = deps.getMainWindow(); + if (mainWindow && !mainWindow.isDestroyed()) { + mainWindow.webContents.send('speaker-changed', speaker); + } + }); + + deps.conversationManager.on('message-updated', (message) => { + const mainWindow = deps.getMainWindow(); + if (mainWindow && !mainWindow.isDestroyed()) { + mainWindow.webContents.send('conversation-message-updated', message); + } + }); + + deps.conversationManager.on('conversation-cleared', () => { + const mainWindow = deps.getMainWindow(); + if (mainWindow && !mainWindow.isDestroyed()) { + mainWindow.webContents.send('conversation-cleared'); + } + }); + } } diff --git a/electron/main.ts b/electron/main.ts index 0eae187a..ea751ec1 100644 --- a/electron/main.ts +++ b/electron/main.ts @@ -29,6 +29,9 @@ const state = { screenshotHelper: null as ScreenshotHelper | null, shortcutsHelper: null as ShortcutsHelper | null, processingHelper: null as ProcessingHelper | null, + transcriptionHelper: null as import('./TranscriptionHelper').TranscriptionHelper | null, + conversationManager: null as import('./ConversationManager').ConversationManager | null, + answerAssistant: null as import('./AnswerAssistant').AnswerAssistant | null, // View and state management view: "queue" as "queue" | "solutions" | "debug", @@ -70,6 +73,7 @@ export interface IProcessingHelperDeps { setHasDebugged: (value: boolean) => void getHasDebugged: () => boolean PROCESSING_EVENTS: typeof state.PROCESSING_EVENTS + getConversationManager?: () => import('./ConversationManager').ConversationManager | null } export interface IShortcutsHelperDeps { @@ -107,10 +111,13 @@ export interface IIpcHandlerDeps { moveWindowRight: () => void moveWindowUp: () => void moveWindowDown: () => void + transcriptionHelper?: import('./TranscriptionHelper').TranscriptionHelper + conversationManager?: import('./ConversationManager').ConversationManager + answerAssistant?: import('./AnswerAssistant').AnswerAssistant } // Initialize helpers -function initializeHelpers() { +async function initializeHelpers() { state.screenshotHelper = new ScreenshotHelper(state.view) state.processingHelper = new ProcessingHelper({ getScreenshotHelper, @@ -127,8 +134,19 @@ function initializeHelpers() { deleteScreenshot, setHasDebugged, getHasDebugged, - PROCESSING_EVENTS: state.PROCESSING_EVENTS + PROCESSING_EVENTS: state.PROCESSING_EVENTS, + getConversationManager: () => state.conversationManager } as IProcessingHelperDeps) + + // Initialize conversation and transcription helpers + const { TranscriptionHelper } = await import('./TranscriptionHelper') + const { ConversationManager } = await import('./ConversationManager') + const { AnswerAssistant } = await import('./AnswerAssistant') + + state.transcriptionHelper = new TranscriptionHelper() + state.conversationManager = new ConversationManager() + state.answerAssistant = new AnswerAssistant() + state.shortcutsHelper = new ShortcutsHelper({ getMainWindow, takeScreenshot, @@ -530,7 +548,7 @@ async function initializeApp() { console.log("No API key found in configuration. User will need to set up.") } - initializeHelpers() + await initializeHelpers() initializeIpcHandlers({ getMainWindow, setWindowDimensions, @@ -557,7 +575,10 @@ async function initializeApp() { ) ), moveWindowUp: () => moveWindowVertical((y) => y - state.step), - moveWindowDown: () => moveWindowVertical((y) => y + state.step) + moveWindowDown: () => moveWindowVertical((y) => y + state.step), + transcriptionHelper: state.transcriptionHelper, + conversationManager: state.conversationManager, + answerAssistant: state.answerAssistant }) await createWindow() state.shortcutsHelper?.registerGlobalShortcuts() diff --git a/electron/preload.ts b/electron/preload.ts index 85f32156..7feecb48 100644 --- a/electron/preload.ts +++ b/electron/preload.ts @@ -205,7 +205,7 @@ const electronAPI = { // New methods for OpenAI API integration getConfig: () => ipcRenderer.invoke("get-config"), - updateConfig: (config: { apiKey?: string; model?: string; language?: string; opacity?: number }) => + updateConfig: (config: { apiKey?: string; model?: string; language?: string; opacity?: number; apiProvider?: string; extractionModel?: string; solutionModel?: string; debuggingModel?: string; answerModel?: string; speechRecognitionModel?: string; candidateProfile?: any }) => ipcRenderer.invoke("update-config", config), onShowSettings: (callback: () => void) => { const subscription = () => callback() @@ -236,7 +236,61 @@ const electronAPI = { ipcRenderer.removeListener("delete-last-screenshot", subscription) } }, - deleteLastScreenshot: () => ipcRenderer.invoke("delete-last-screenshot") + deleteLastScreenshot: () => ipcRenderer.invoke("delete-last-screenshot"), + + // ============================================ + // Conversation & Transcription Methods + // ============================================ + + // Transcription + transcribeAudio: (audioBuffer: ArrayBuffer, mimeType: string) => + ipcRenderer.invoke("transcribe-audio", audioBuffer, mimeType), + + // Conversation + addConversationMessage: (text: string, speaker?: string) => + ipcRenderer.invoke("add-conversation-message", text, speaker), + toggleSpeaker: () => ipcRenderer.invoke("toggle-speaker"), + getConversation: () => ipcRenderer.invoke("get-conversation"), + clearConversation: () => ipcRenderer.invoke("clear-conversation"), + updateConversationMessage: (messageId: string, newText: string) => + ipcRenderer.invoke("update-conversation-message", messageId, newText), + + // AI suggestions + getAnswerSuggestions: (question: string, screenshotContext?: string, candidateProfile?: any) => + ipcRenderer.invoke("get-answer-suggestions", question, screenshotContext, candidateProfile), + + // Event listeners + onConversationMessageAdded: (callback: (message: any) => void) => { + const subscription = (_: any, message: any) => callback(message) + ipcRenderer.on("conversation-message-added", subscription) + return () => { + ipcRenderer.removeListener("conversation-message-added", subscription) + } + }, + + onSpeakerChanged: (callback: (speaker: string) => void) => { + const subscription = (_: any, speaker: string) => callback(speaker) + ipcRenderer.on("speaker-changed", subscription) + return () => { + ipcRenderer.removeListener("speaker-changed", subscription) + } + }, + + onConversationMessageUpdated: (callback: (message: any) => void) => { + const subscription = (_: any, message: any) => callback(message) + ipcRenderer.on("conversation-message-updated", subscription) + return () => { + ipcRenderer.removeListener("conversation-message-updated", subscription) + } + }, + + onConversationCleared: (callback: () => void) => { + const subscription = () => callback() + ipcRenderer.on("conversation-cleared", subscription) + return () => { + ipcRenderer.removeListener("conversation-cleared", subscription) + } + } } // Before exposing the API diff --git a/electron/shortcuts.ts b/electron/shortcuts.ts index a6fa5ebb..e53dc34a 100644 --- a/electron/shortcuts.ts +++ b/electron/shortcuts.ts @@ -106,6 +106,39 @@ export class ShortcutsHelper { this.deps.toggleMainWindow() }) + // Recording toggle (Ctrl/Cmd+M) + globalShortcut.register("CommandOrControl+M", async () => { + const mainWindow = this.deps.getMainWindow(); + if (mainWindow && !mainWindow.isDestroyed()) { + console.log("Command/Ctrl + M pressed. Toggling recording."); + try { + await mainWindow.webContents.executeJavaScript(` + (async () => { + const event = new CustomEvent('toggle-recording'); + window.dispatchEvent(event); + })(); + `); + } catch (error) { + console.error("Error toggling recording:", error); + } + } + }); + + // Speaker toggle (Ctrl/Cmd+Shift+M) + globalShortcut.register("CommandOrControl+Shift+M", async () => { + const mainWindow = this.deps.getMainWindow(); + if (mainWindow && !mainWindow.isDestroyed()) { + console.log("Command/Ctrl + Shift + M pressed. Toggling speaker."); + try { + await mainWindow.webContents.executeJavaScript(` + window.electronAPI.toggleSpeaker(); + `); + } catch (error) { + console.error("Error toggling speaker:", error); + } + } + }); + globalShortcut.register("CommandOrControl+Q", () => { console.log("Command/Ctrl + Q pressed. Quitting application.") app.quit() diff --git a/package.json b/package.json index 1fffcfb5..b92bc9cf 100644 --- a/package.json +++ b/package.json @@ -124,8 +124,6 @@ "dependencies": { "@anthropic-ai/sdk": "^0.39.0", "@electron/notarize": "^2.3.0", - "@emotion/react": "^11.11.0", - "@emotion/styled": "^11.11.0", "@radix-ui/react-dialog": "^1.1.2", "@radix-ui/react-label": "^2.1.0", "@radix-ui/react-slot": "^1.1.0", @@ -144,7 +142,6 @@ "lucide-react": "^0.460.0", "openai": "^4.28.4", "react": "^18.2.0", - "react-code-blocks": "^0.1.6", "react-dom": "^18.2.0", "react-router-dom": "^6.28.1", "react-syntax-highlighter": "^15.6.1", @@ -202,5 +199,8 @@ "last 1 firefox version", "last 1 safari version" ] + }, + "overrides": { + "prismjs": ">=1.30.0" } } diff --git a/shared/aiModels.ts b/shared/aiModels.ts new file mode 100644 index 00000000..583203b3 --- /dev/null +++ b/shared/aiModels.ts @@ -0,0 +1,390 @@ +// shared/aiModels.ts +// Central configuration for AI providers, models, and related helpers. +// This module is the single source of truth for: +// - Supported providers +// - Available models per provider and category +// - Default models per provider and category +// - Model validation/sanitization +// +// Changing models or providers should only require edits in this file. + +export type APIProvider = "openai" | "gemini" | "anthropic"; + +export type ModelCategoryKey = + | "extractionModel" + | "solutionModel" + | "debuggingModel" + | "answerModel"; + +export interface AIModel { + id: string; + name: string; + description: string; +} + +export interface ModelCategoryDefinition { + key: ModelCategoryKey; + title: string; + description: string; + modelsByProvider: Record; +} + +/** + * Default provider used when no provider is configured or an invalid provider is found. + */ +export const DEFAULT_PROVIDER: APIProvider = "gemini"; + +/** + * Default models per provider and category. + * These are used for: + * - Initial config defaults + * - Resetting models when provider changes + * - Fallbacks when a model is missing in config + */ +export const DEFAULT_MODELS: Record< + APIProvider, + { + extractionModel: string; + solutionModel: string; + debuggingModel: string; + answerModel: string; + // Speech recognition is supported for OpenAI (Whisper) and Gemini (Audio Understanding) + speechRecognitionModel?: string; + } +> = { + openai: { + extractionModel: "gpt-4o", + solutionModel: "gpt-4o", + debuggingModel: "gpt-4o", + answerModel: "gpt-4o-mini", + speechRecognitionModel: "whisper-1", + }, + gemini: { + extractionModel: "gemini-3-flash-preview", + solutionModel: "gemini-3-flash-preview", + debuggingModel: "gemini-3-flash-preview", + answerModel: "gemini-3-flash-preview", + speechRecognitionModel: "gemini-3-flash-preview", + }, + anthropic: { + extractionModel: "claude-3-7-sonnet-20250219", + solutionModel: "claude-3-7-sonnet-20250219", + debuggingModel: "claude-3-7-sonnet-20250219", + answerModel: "claude-3-7-sonnet-20250219", + }, +}; + +/** + * Default models specifically for the answer suggestion assistant. + * This allows us to evolve those independently from the screenshot + * processing defaults if needed. + */ +export const DEFAULT_ANSWER_MODELS: Record = { + openai: "gpt-4o-mini", + gemini: "gemini-3-flash-preview", + anthropic: "claude-3-7-sonnet-20250219", +}; + +/** + * Allowed model ids per provider. + * Used for validation/sanitization when reading or updating config. + */ +export const ALLOWED_MODELS: Record = { + openai: [ + "gpt-4o", + "gpt-4o-mini", + ], + gemini: [ + // Current Gemini 3 models (preview) + "gemini-3-pro-preview", + "gemini-3-flash-preview", + "gemini-3-pro-image-preview", + // Legacy models kept for backwards compatibility + "gemini-1.5-pro", + "gemini-1.5-flash", + "gemini-2.0-flash-exp", + ], + anthropic: [ + "claude-3-7-sonnet-20250219", + "claude-3-5-sonnet-20241022", + "claude-3-opus-20240229", + ], +}; + +/** + * Settings UI model catalogue, organized by functional category and provider. + */ +export const MODEL_CATEGORIES: ModelCategoryDefinition[] = [ + { + key: "extractionModel", + title: "Problem Extraction", + description: + "Model used to analyze screenshots and extract problem details", + modelsByProvider: { + openai: [ + { + id: "gpt-4o", + name: "gpt-4o", + description: "Best overall performance for problem extraction", + }, + { + id: "gpt-4o-mini", + name: "gpt-4o-mini", + description: "Faster, more cost-effective option", + }, + ], + gemini: [ + { + id: "gemini-3-pro-preview", + name: "Gemini 3 Pro (Preview)", + description: "Best overall performance for complex tasks requiring advanced reasoning", + }, + { + id: "gemini-3-flash-preview", + name: "Gemini 3 Flash (Preview)", + description: "Pro-level intelligence at Flash speed and pricing", + }, + { + id: "gemini-1.5-pro", + name: "Gemini 1.5 Pro", + description: "Legacy model - use Gemini 3 for best results", + }, + { + id: "gemini-1.5-flash", + name: "Gemini 1.5 Flash", + description: "Legacy model - use Gemini 3 Flash for best results", + }, + ], + anthropic: [ + { + id: "claude-3-7-sonnet-20250219", + name: "Claude 3.7 Sonnet", + description: "Best overall performance for problem extraction", + }, + { + id: "claude-3-5-sonnet-20241022", + name: "Claude 3.5 Sonnet", + description: "Balanced performance and speed", + }, + { + id: "claude-3-opus-20240229", + name: "Claude 3 Opus", + description: + "Top-level intelligence, fluency, and understanding", + }, + ], + }, + }, + { + key: "solutionModel", + title: "Solution Generation", + description: "Model used to generate coding solutions", + modelsByProvider: { + openai: [ + { + id: "gpt-4o", + name: "gpt-4o", + description: "Strong overall performance for coding tasks", + }, + { + id: "gpt-4o-mini", + name: "gpt-4o-mini", + description: "Faster, more cost-effective option", + }, + ], + gemini: [ + { + id: "gemini-3-pro-latest", + name: "Gemini 3 Pro (Latest)", + description: "Strong overall performance - latest version", + }, + { + id: "gemini-3-flash-latest", + name: "Gemini 3 Flash (Latest)", + description: "Faster, more cost-effective - latest version", + }, + { + id: "gemini-3-pro", + name: "Gemini 3 Pro", + description: "Stable version", + }, + { + id: "gemini-3-flash", + name: "Gemini 3 Flash", + description: "Stable version", + }, + { + id: "gemini-1.5-pro", + name: "Gemini 1.5 Pro", + description: "Legacy model - use Gemini 3 for best results", + }, + ], + anthropic: [ + { + id: "claude-3-7-sonnet-20250219", + name: "Claude 3.7 Sonnet", + description: "Strong overall performance for coding tasks", + }, + { + id: "claude-3-5-sonnet-20241022", + name: "Claude 3.5 Sonnet", + description: "Balanced performance and speed", + }, + { + id: "claude-3-opus-20240229", + name: "Claude 3 Opus", + description: + "Top-level intelligence, fluency, and understanding", + }, + ], + }, + }, + { + key: "debuggingModel", + title: "Debugging", + description: "Model used to debug and improve solutions", + modelsByProvider: { + openai: [ + { + id: "gpt-4o", + name: "gpt-4o", + description: "Best for analyzing code and error messages", + }, + { + id: "gpt-4o-mini", + name: "gpt-4o-mini", + description: "Faster, more cost-effective option", + }, + ], + gemini: [ + { + id: "gemini-3-pro-latest", + name: "Gemini 3 Pro (Latest)", + description: + "Best for analyzing code and error messages - latest version", + }, + { + id: "gemini-3-flash-latest", + name: "Gemini 3 Flash (Latest)", + description: "Faster, more cost-effective - latest version", + }, + { + id: "gemini-3-pro", + name: "Gemini 3 Pro", + description: "Stable version", + }, + { + id: "gemini-3-flash", + name: "Gemini 3 Flash", + description: "Stable version", + }, + { + id: "gemini-1.5-pro", + name: "Gemini 1.5 Pro", + description: "Legacy model - use Gemini 3 for best results", + }, + ], + anthropic: [ + { + id: "claude-3-7-sonnet-20250219", + name: "Claude 3.7 Sonnet", + description: "Best for analyzing code and error messages", + }, + { + id: "claude-3-5-sonnet-20241022", + name: "Claude 3.5 Sonnet", + description: "Balanced performance and speed", + }, + { + id: "claude-3-opus-20240229", + name: "Claude 3 Opus", + description: + "Top-level intelligence, fluency, and understanding", + }, + ], + }, + }, + { + key: "answerModel", + title: "Answer Suggestions", + description: "Model used to generate AI answer suggestions for conversation questions", + modelsByProvider: { + openai: [ + { + id: "gpt-4o-mini", + name: "gpt-4o-mini", + description: "Fast and cost-effective for conversation suggestions", + }, + { + id: "gpt-4o", + name: "gpt-4o", + description: "Best overall performance for answer suggestions", + }, + ], + gemini: [ + { + id: "gemini-3-flash-preview", + name: "Gemini 3 Flash (Preview)", + description: "Fast and efficient for conversation suggestions", + }, + { + id: "gemini-3-pro-preview", + name: "Gemini 3 Pro (Preview)", + description: "Best performance for complex conversation contexts", + }, + { + id: "gemini-1.5-pro", + name: "Gemini 1.5 Pro", + description: "Legacy model - use Gemini 3 for best results", + }, + { + id: "gemini-1.5-flash", + name: "Gemini 1.5 Flash", + description: "Legacy model - use Gemini 3 Flash for best results", + }, + ], + anthropic: [ + { + id: "claude-3-7-sonnet-20250219", + name: "Claude 3.7 Sonnet", + description: "Best overall performance for answer suggestions", + }, + { + id: "claude-3-5-sonnet-20241022", + name: "Claude 3.5 Sonnet", + description: "Balanced performance and speed", + }, + { + id: "claude-3-opus-20240229", + name: "Claude 3 Opus", + description: + "Top-level intelligence, fluency, and understanding", + }, + ], + }, + }, +]; + +/** + * Sanitize a model selection to ensure only allowed models are used. + * If the model is not allowed for the provider, the provider's default + * model for the given category is returned. + */ +export function sanitizeModelSelection( + model: string, + provider: APIProvider, + category: ModelCategoryKey +): string { + const allowed = ALLOWED_MODELS[provider]; + if (allowed.includes(model)) { + return model; + } + + const fallback = DEFAULT_MODELS[provider][category]; + // eslint-disable-next-line no-console + console.warn( + `Invalid ${provider} model specified for ${category}: ${model}. Using default model: ${fallback}` + ); + return fallback; +} + diff --git a/src/App.tsx b/src/App.tsx index f2dd348d..eab3d738 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -1,4 +1,4 @@ -import SubscribedApp from "./_pages/SubscribedApp" +import { lazy, Suspense } from "react" import { UpdateNotification } from "./components/UpdateNotification" import { QueryClient, @@ -14,14 +14,21 @@ import { } from "./components/ui/toast" import { ToastContext } from "./contexts/toast" import { WelcomeScreen } from "./components/WelcomeScreen" -import { SettingsDialog } from "./components/Settings/SettingsDialog" + +// Lazy load heavy components for better code splitting +const SubscribedApp = lazy(() => import("./_pages/SubscribedApp")) +const SettingsDialog = lazy(() => + import("./components/Settings/SettingsDialog").then(module => ({ + default: module.SettingsDialog + })) +) // Create a React Query client const queryClient = new QueryClient({ defaultOptions: { queries: { staleTime: 0, - gcTime: Infinity, + gcTime: 5 * 60 * 1000, // 5 minutes - prevents memory leaks retry: 1, refetchOnWindowFocus: false }, @@ -243,11 +250,20 @@ function App() {
{isInitialized ? ( hasApiKey ? ( - + +
+
+

Loading...

+
+
+ }> + + ) : ( ) @@ -264,11 +280,15 @@ function App() { - {/* Settings Dialog */} - + {/* Settings Dialog - Lazy loaded */} + {isSettingsOpen && ( + + + + )} + import("react-syntax-highlighter").then(module => ({ + default: module.Prism + })) +) import ScreenshotQueue from "../components/Queue/ScreenshotQueue" import SolutionCommands from "../components/Solutions/SolutionCommands" import { Screenshot } from "../types/screenshots" @@ -32,10 +36,20 @@ const CodeSection = ({ ) : (
- Loading syntax highlighter...
}> + { + // Dynamically import style to reduce initial bundle size + // This will be code-split by Vite + try { + const styleModule = require("react-syntax-highlighter/dist/esm/styles/prism") + return styleModule.dracula || {} + } catch { + return {} + } + })()} customStyle={{ maxWidth: "100%", margin: 0, @@ -47,7 +61,8 @@ const CodeSection = ({ wrapLongLines={true} > {code as string} - + + )} diff --git a/src/_pages/Queue.tsx b/src/_pages/Queue.tsx index c9194d5e..db9a3058 100644 --- a/src/_pages/Queue.tsx +++ b/src/_pages/Queue.tsx @@ -2,6 +2,7 @@ import React, { useState, useEffect, useRef } from "react" import { useQuery } from "@tanstack/react-query" import ScreenshotQueue from "../components/Queue/ScreenshotQueue" import QueueCommands from "../components/Queue/QueueCommands" +import { ConversationSection } from "../components/Conversation/ConversationSection" import { useToast } from "../contexts/toast" import { Screenshot } from "../types/screenshots" @@ -137,9 +138,14 @@ const Queue: React.FC = ({ }; return ( -
+
-
+
+ {/* Conversation Section - Works independently of screenshots */} +
+ +
+ + import("react-syntax-highlighter").then(module => ({ + default: module.Prism + })) +) import ScreenshotQueue from "../components/Queue/ScreenshotQueue" @@ -11,6 +16,7 @@ import SolutionCommands from "../components/Solutions/SolutionCommands" import Debug from "./Debug" import { useToast } from "../contexts/toast" import { COMMAND_KEY } from "../utils/platform" +import { ConversationSection } from "../components/Conversation/ConversationSection" export const ContentSection = ({ title, @@ -81,10 +87,21 @@ const SolutionSection = ({ > {copied ? "Copied!" : "Copy"} - Loading syntax highlighter...
}> + { + // Dynamically import style to reduce initial bundle size + // This will be code-split by Vite + try { + // Use dynamic import for better tree-shaking + const styleModule = require("react-syntax-highlighter/dist/esm/styles/prism") + return styleModule.dracula || {} + } catch { + return {} + } + })()} customStyle={{ maxWidth: "100%", margin: 0, @@ -96,7 +113,8 @@ const SolutionSection = ({ wrapLongLines={true} > {content as string} - + +
)}
@@ -500,6 +518,11 @@ const Solutions: React.FC = ({ setLanguage={setLanguage} /> + {/* Conversation Section */} +
+ +
+ {/* Main Content - Modified width constraints */}
diff --git a/src/components/Conversation/ConversationCommands.tsx b/src/components/Conversation/ConversationCommands.tsx new file mode 100644 index 00000000..8b7f7923 --- /dev/null +++ b/src/components/Conversation/ConversationCommands.tsx @@ -0,0 +1,199 @@ +/** + * ConversationCommands - Command bar for conversation/recording features + * Follows the same design pattern as QueueCommands and SolutionCommands + */ +import React, { useState, useEffect, useRef } from "react"; +import { useToast } from "../../contexts/toast"; +import { COMMAND_KEY } from "../../utils/platform"; + +interface ConversationCommandsProps { + onTooltipVisibilityChange: (visible: boolean, height: number) => void; + isRecording: boolean; + isProcessing: boolean; + recordingDuration: number; + currentSpeaker: 'interviewer' | 'interviewee'; + onStartRecording: () => Promise; + onStopRecording: () => Promise; + onToggleSpeaker: () => Promise; + onClearConversation: () => Promise; +} + +export const ConversationCommands: React.FC = ({ + onTooltipVisibilityChange, + isRecording, + isProcessing, + recordingDuration, + currentSpeaker, + onStartRecording, + onStopRecording, + onToggleSpeaker, + onClearConversation, +}) => { + const [isTooltipVisible, setIsTooltipVisible] = useState(false); + const tooltipRef = useRef(null); + const { showToast } = useToast(); + + useEffect(() => { + let tooltipHeight = 0; + if (tooltipRef.current && isTooltipVisible) { + tooltipHeight = tooltipRef.current.offsetHeight + 10; + } + onTooltipVisibilityChange(isTooltipVisible, tooltipHeight); + }, [isTooltipVisible, onTooltipVisibilityChange]); + + const handleMouseEnter = () => { + setIsTooltipVisible(true); + }; + + const handleMouseLeave = () => { + setIsTooltipVisible(false); + }; + + const formatDuration = (seconds: number) => { + const mins = Math.floor(seconds / 60); + const secs = seconds % 60; + return `${mins}:${secs.toString().padStart(2, '0')}`; + }; + + const handleToggleRecording = async () => { + if (isRecording) { + await onStopRecording(); + } else { + await onStartRecording(); + } + }; + + return ( +
+
+
+ {/* Start/Stop Recording */} +
+ + {isRecording ? `Stop (${formatDuration(recordingDuration)})` : 'Start Recording'} + +
+ + +
+
+ + {/* Toggle Speaker Mode */} +
+ + {currentSpeaker === 'interviewer' ? 'Interviewer' : 'You'} + +
+ + + +
+
+ + {/* Clear Conversation */} +
+ Clear +
+ + {/* Keyboard Shortcuts Tooltip Trigger */} +
+ Shortcuts + + {/* Tooltip Content */} + {isTooltipVisible && ( +
+ {/* Add transparent bridge */} +
+
+
+

+ Keyboard Shortcuts +

+
+ {/* Start/Stop Recording */} +
+
+ Start/Stop Recording +
+ + {COMMAND_KEY} + + + M + +
+
+

+ Record interview conversation for transcription. +

+
+ + {/* Toggle Speaker Mode */} +
+
+ Toggle Speaker Mode +
+ + {COMMAND_KEY} + + + Shift + + + M + +
+
+

+ Switch between Interviewer and You mode. +

+
+
+
+
+
+ )} +
+ + {isProcessing && ( + Processing... + )} +
+
+
+ ); +}; diff --git a/src/components/Conversation/ConversationSection.tsx b/src/components/Conversation/ConversationSection.tsx new file mode 100644 index 00000000..9aa33464 --- /dev/null +++ b/src/components/Conversation/ConversationSection.tsx @@ -0,0 +1,380 @@ +/** + * ConversationSection - UI component for conversation recording and AI suggestions + * Follows Single Responsibility Principle - only handles conversation UI + * Uses existing ContentSection pattern for consistency + * Integrates with screenshot system for cohesive experience + */ +import React, { useState, useEffect, useRef } from 'react'; +import { useQueryClient } from '@tanstack/react-query'; +import { AudioRecorder } from '../../utils/audioRecorder'; +import { ConversationCommands } from './ConversationCommands'; + +interface ConversationMessage { + id: string; + speaker: 'interviewer' | 'interviewee'; + text: string; + timestamp: number; + edited?: boolean; +} + +interface AISuggestion { + suggestions: string[]; + reasoning: string; +} + +// Reuse the same ContentSection style from Solutions.tsx for consistency +const ContentSection = ({ + title, + content, + isLoading +}: { + title: string; + content: React.ReactNode; + isLoading: boolean; +}) => ( +
+

+ {title} +

+ {isLoading ? ( +
+

+ Processing... +

+
+ ) : ( +
+ {content} +
+ )} +
+); + +export const ConversationSection: React.FC = () => { + const queryClient = useQueryClient(); + const [messages, setMessages] = useState([]); + const [isRecording, setIsRecording] = useState(false); + const [currentSpeaker, setCurrentSpeaker] = useState<'interviewer' | 'interviewee'>('interviewee'); + const [aiSuggestions, setAiSuggestions] = useState(null); + const [isProcessing, setIsProcessing] = useState(false); + const [recordingDuration, setRecordingDuration] = useState(0); + const [tooltipHeight, setTooltipHeight] = useState(0); + const messagesEndRef = useRef(null); + const audioRecorderRef = useRef(null); + const durationIntervalRef = useRef(null); + const processingCountRef = useRef(0); + + // Use ref to track recording state for event listener + const isRecordingRef = useRef(false); + + const handleTooltipVisibilityChange = (visible: boolean, height: number) => { + setTooltipHeight(height); + }; + + const handleClearConversation = async () => { + try { + await window.electronAPI.clearConversation(); + } catch (error) { + console.error('Failed to clear conversation:', error); + } + }; + + useEffect(() => { + isRecordingRef.current = isRecording; + }, [isRecording]); + + useEffect(() => { + loadConversation(); + + const unsubscribeMessageAdded = window.electronAPI.onConversationMessageAdded((message: ConversationMessage) => { + setMessages(prev => [...prev, message]); + scrollToBottom(); + }); + + const unsubscribeSpeakerChanged = window.electronAPI.onSpeakerChanged((speaker: string) => { + setCurrentSpeaker(speaker as 'interviewer' | 'interviewee'); + }); + + const unsubscribeMessageUpdated = window.electronAPI.onConversationMessageUpdated((message: ConversationMessage) => { + setMessages(prev => prev.map(msg => msg.id === message.id ? message : msg)); + }); + + const unsubscribeCleared = window.electronAPI.onConversationCleared(() => { + setMessages([]); + setAiSuggestions(null); + }); + + // Listen for keyboard shortcut to toggle recording + const handleToggleRecording = async () => { + // Check actual recording state using ref to get latest value + const currentIsRecording = isRecordingRef.current || (audioRecorderRef.current?.getIsRecording() || false); + if (currentIsRecording) { + await handleStopRecording(); + } else { + await handleStartRecording(); + } + }; + + window.addEventListener('toggle-recording', handleToggleRecording); + + return () => { + unsubscribeMessageAdded(); + unsubscribeSpeakerChanged(); + unsubscribeMessageUpdated(); + unsubscribeCleared(); + window.removeEventListener('toggle-recording', handleToggleRecording); + if (durationIntervalRef.current) { + clearInterval(durationIntervalRef.current); + } + }; + }, []); + + const scrollToBottom = () => { + messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' }); + }; + + const loadConversation = async () => { + try { + const result = await window.electronAPI.getConversation(); + if (result.success) { + setMessages(result.messages); + scrollToBottom(); + } + } catch (error) { + console.error('Failed to load conversation:', error); + } + }; + + const handleStartRecording = async () => { + try { + // Check if already recording + if (audioRecorderRef.current?.getIsRecording()) { + console.log('Already recording'); + return; + } + + if (!audioRecorderRef.current) { + audioRecorderRef.current = new AudioRecorder(); + } + + await audioRecorderRef.current.startRecording(); + setIsRecording(true); + isRecordingRef.current = true; + setRecordingDuration(0); + + // Start duration counter + durationIntervalRef.current = setInterval(() => { + setRecordingDuration(prev => prev + 1); + }, 1000); + } catch (error: any) { + console.error('Failed to start recording:', error); + alert(error.message || 'Failed to start recording. Please check microphone permissions.'); + } + }; + + const handleStopRecording = async () => { + // Check recorder state directly instead of React state to avoid stale closures + if (!audioRecorderRef.current || !audioRecorderRef.current.getIsRecording()) { + console.log('Not recording, cannot stop'); + return; + } + + setIsRecording(false); + isRecordingRef.current = false; + + if (durationIntervalRef.current) { + clearInterval(durationIntervalRef.current); + durationIntervalRef.current = null; + } + + try { + const audioBlob = await audioRecorderRef.current.stopRecording(); + const speakerAtStop = currentSpeaker; + setRecordingDuration(0); + + // Kick off transcription/processing asynchronously so UI stays responsive + void processRecording(audioBlob, speakerAtStop); + + // Auto-toggle speaker for the next recording cycle + void toggleSpeakerForNextTurn(); + } catch (error: any) { + console.error('Failed to stop recording:', error); + alert(error.message || 'Failed to stop recording'); + } + }; + + const processRecording = async (audioBlob: Blob, speaker: 'interviewer' | 'interviewee') => { + updateProcessingStatus(1); + try { + const arrayBuffer = await audioBlob.arrayBuffer(); + + const transcribeResult = await window.electronAPI.transcribeAudio(arrayBuffer, audioBlob.type); + + if (transcribeResult.success && transcribeResult.result) { + const text = transcribeResult.result.text; + + await window.electronAPI.addConversationMessage(text, speaker); + + if (speaker === 'interviewer') { + await fetchAISuggestions(text); + } + } + } catch (error: any) { + console.error('Failed to process recording:', error); + alert(error.message || 'Failed to process recording'); + } finally { + updateProcessingStatus(-1); + } + }; + + const updateProcessingStatus = (delta: number) => { + processingCountRef.current = Math.max(0, processingCountRef.current + delta); + setIsProcessing(processingCountRef.current > 0); + }; + + const fetchAISuggestions = async (question: string) => { + try { + // Get problem statement from query cache if available (from screenshots) + const problemStatement = queryClient.getQueryData(['problem_statement']) as any; + let screenshotContext: string | undefined; + + if (problemStatement?.problem_statement) { + screenshotContext = `Problem Statement: ${problemStatement.problem_statement}\nConstraints: ${problemStatement.constraints || 'N/A'}\nExample Input: ${problemStatement.example_input || 'N/A'}\nExample Output: ${problemStatement.example_output || 'N/A'}`; + } + + // Get candidate profile from config + const config = await window.electronAPI.getConfig(); + const candidateProfile = (config as any).candidateProfile; + + const result = await window.electronAPI.getAnswerSuggestions(question, screenshotContext, candidateProfile); + if (result.success && result.suggestions) { + setAiSuggestions(result.suggestions); + } + } catch (error: any) { + console.error('Failed to get AI suggestions:', error); + // Don't show alert for suggestion errors - it's not critical + } + }; + + const handleToggleSpeaker = async () => { + try { + const result = await window.electronAPI.toggleSpeaker(); + if (result.success) { + setCurrentSpeaker(result.speaker); + // Don't clear suggestions - user needs to see them when preparing their answer! + } + } catch (error) { + console.error('Failed to toggle speaker:', error); + } + }; + + const toggleSpeakerForNextTurn = async () => { + try { + const result = await window.electronAPI.toggleSpeaker(); + if (result.success) { + setCurrentSpeaker(result.speaker); + } + } catch (error) { + console.error('Failed to auto-toggle speaker:', error); + } + }; + + const formatTime = (timestamp: number) => { + return new Date(timestamp).toLocaleTimeString([], { + hour: '2-digit', + minute: '2-digit' + }); + }; + + const formatDuration = (seconds: number) => { + const mins = Math.floor(seconds / 60); + const secs = seconds % 60; + return `${mins}:${secs.toString().padStart(2, '0')}`; + }; + + return ( +
+ {/* Conversation Commands Bar - Matches QueueCommands/SolutionCommands style */} + + + {/* Scrollable Conversation Area - Takes remaining space above AI suggestions */} +
+ {messages.length > 0 && ( + + {messages.map((message) => ( +
+
+
+ {message.speaker === 'interviewer' ? '👤 Interviewer' : '🎤 You'} +
+
{message.text}
+
+ {formatTime(message.timestamp)} +
+
+
+ ))} +
+ } + isLoading={false} + /> + )} +
+
+ + {/* AI Suggestions - Fixed at bottom, always visible, never scrolls */} + {aiSuggestions && ( +
+ +
+ {aiSuggestions.suggestions.map((suggestion, index) => ( +
+
+
{suggestion}
+
+ ))} +
+
+ } + isLoading={false} + /> +
+ )} +
+ ); +}; diff --git a/src/components/Queue/QueueCommands.tsx b/src/components/Queue/QueueCommands.tsx index 88d6c283..f33fe518 100644 --- a/src/components/Queue/QueueCommands.tsx +++ b/src/components/Queue/QueueCommands.tsx @@ -321,6 +321,74 @@ const QueueCommands: React.FC = ({

+ {/* Start/Stop Recording Command */} +
{ + try { + const event = new CustomEvent('toggle-recording'); + window.dispatchEvent(event); + } catch (error) { + console.error("Error toggling recording:", error) + showToast( + "Error", + "Failed to toggle recording", + "error" + ) + } + }} + > +
+ Start/Stop Recording +
+ + {COMMAND_KEY} + + + M + +
+
+

+ Record interview conversation for transcription. +

+
+ + {/* Toggle Speaker Mode Command */} +
{ + try { + await window.electronAPI.toggleSpeaker(); + } catch (error) { + console.error("Error toggling speaker:", error) + showToast( + "Error", + "Failed to toggle speaker mode", + "error" + ) + } + }} + > +
+ Toggle Speaker Mode +
+ + {COMMAND_KEY} + + + Shift + + + M + +
+
+

+ Switch between Interviewer and You mode. +

+
+ {/* Solve Command */}
void; +} + +export const CandidateProfileSection: React.FC = ({ + profile, + onProfileChange, +}) => { + const [localProfile, setLocalProfile] = useState(profile); + + const handleFieldChange = (field: keyof CandidateProfile, value: string) => { + const updated = { ...localProfile, [field]: value }; + setLocalProfile(updated); + onProfileChange(updated); + }; + + return ( +
+
+ + handleFieldChange('name', e.target.value)} + placeholder="Your name" + className="bg-black/30 border-white/10 text-white placeholder:text-white/40" + /> +
+ +
+ +