Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cloud/packages/cloud/src/models/user.model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ export interface UserI extends Document {
bypassAudioEncoding: boolean;
metricSystemEnabled: boolean;
enforceLocalTranscription: boolean;
localSttLanguage: string;
};
location?: Location;
locationSubscriptions?: Map<string, { rate: string }>;
Expand Down Expand Up @@ -181,6 +182,7 @@ const UserSchema = new Schema<UserI>(
bypassVad: { type: Boolean, default: false },
bypassAudioEncoding: { type: Boolean, default: false },
enforceLocalTranscription: { type: Boolean, default: false },
localSttLanguage: { type: String, default: null },
},
default: function () {
return {
Expand All @@ -197,6 +199,7 @@ const UserSchema = new Schema<UserI>(
bypassVad: false,
bypassAudioEncoding: false,
enforceLocalTranscription: false,
localSttLanguage: null,
};
},
},
Expand Down
99 changes: 59 additions & 40 deletions cloud/packages/cloud/src/services/session/MicrophoneManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,15 @@ export class MicrophoneManager {

// Cached subscription state to avoid expensive repeated lookups
private cachedSubscriptionState: {
hasPCM: boolean;
hasTranscription: boolean;
hasMedia: boolean;
needsPcm: boolean;
needsTranslation: boolean;
transcriptionLanguages: string[];
needsMedia: boolean;
} = {
hasPCM: false,
hasTranscription: false,
hasMedia: false,
needsPcm: false,
needsTranslation: false,
transcriptionLanguages: [],
needsMedia: false,
};

constructor(session: UserSession) {
Expand Down Expand Up @@ -221,11 +223,12 @@ export class MicrophoneManager {
*/
private updateCachedSubscriptionState(): void {
const state =
this.session.subscriptionManager.hasPCMTranscriptionSubscriptions();
this.session.subscriptionManager.getMediaSubscriptionDetails();
this.cachedSubscriptionState = {
hasPCM: state.hasPCM,
hasTranscription: state.hasTranscription,
hasMedia: state.hasMedia,
needsPcm: state.needsPcm,
needsTranslation: state.needsTranslation,
transcriptionLanguages: state.transcriptionLanguages,
needsMedia: state.needsMedia,
};
this.logger.debug(
"Updated cached subscription state",
Expand All @@ -239,28 +242,40 @@ export class MicrophoneManager {
*/
private shouldBypassVadForPCM(): boolean {
// Use cached state instead of calling service
return this.cachedSubscriptionState.hasPCM;
return this.cachedSubscriptionState.needsPcm;
}

calculateRequiredData(
hasPCM: boolean,
hasTranscription: boolean,
needsPcm: boolean,
needsTranslation: boolean,
transcriptionLanguages: string[]
): Array<"pcm" | "transcription" | "pcm_or_transcription"> {
const requiredData: Array<
"pcm" | "transcription" | "pcm_or_transcription"
> = [];
const localTranscriptionLanguage = "en-US";
const isCloudSttDown = this.session.transcriptionManager.isCloudSTTDown();
if (hasPCM) {
const transcriptionLanguageSubscriptions = transcriptionLanguages.length;
let isLocalTranscriptionLangugeSame = false;
if (transcriptionLanguageSubscriptions == 1) {
isLocalTranscriptionLangugeSame = transcriptionLanguages[0] == localTranscriptionLanguage;
}

if (
needsPcm ||
transcriptionLanguageSubscriptions > 1 ||
needsTranslation ||
(transcriptionLanguageSubscriptions == 1 && !isLocalTranscriptionLangugeSame)
) {
requiredData.push("pcm");
if (hasTranscription && isCloudSttDown) {
requiredData.push("transcription");
}
} else {
if (hasTranscription && isCloudSttDown) {
requiredData.push("transcription");
} else {
requiredData.push("pcm_or_transcription");
}
}

if (transcriptionLanguageSubscriptions == 1 && isLocalTranscriptionLangugeSame && isCloudSttDown) {
requiredData.push("transcription");
}

if (!needsPcm && (transcriptionLanguageSubscriptions == 1 && isLocalTranscriptionLangugeSame && !isCloudSttDown)) {
requiredData.push("pcm_or_transcription");
}

return requiredData;
Expand Down Expand Up @@ -302,12 +317,13 @@ export class MicrophoneManager {
// Update cache before using it
this.updateCachedSubscriptionState();

const hasMediaSubscriptions = this.cachedSubscriptionState.hasMedia;
const needsMediaSubscriptions = this.cachedSubscriptionState.needsMedia;
const requiredData = this.calculateRequiredData(
this.cachedSubscriptionState.hasPCM,
this.cachedSubscriptionState.hasTranscription,
this.cachedSubscriptionState.needsPcm,
this.cachedSubscriptionState.needsTranslation,
this.cachedSubscriptionState.transcriptionLanguages,
);
this.updateState(hasMediaSubscriptions, requiredData);
this.updateState(needsMediaSubscriptions, requiredData);
}
}

Expand All @@ -326,16 +342,17 @@ export class MicrophoneManager {
// Update cache when subscriptions change
this.updateCachedSubscriptionState();

const hasMediaSubscriptions = this.cachedSubscriptionState.hasMedia;
const needsMediaSubscriptions = this.cachedSubscriptionState.needsMedia;
const requiredData = this.calculateRequiredData(
this.cachedSubscriptionState.hasPCM,
this.cachedSubscriptionState.hasTranscription,
this.cachedSubscriptionState.needsPcm,
this.cachedSubscriptionState.needsTranslation,
this.cachedSubscriptionState.transcriptionLanguages,
);
this.logger.info(
`Subscription changed, media subscriptions: ${hasMediaSubscriptions}`,
`Subscription changed, media subscriptions: ${needsMediaSubscriptions}`,
);
// Apply holddown when turning mic off to avoid flapping
if (hasMediaSubscriptions) {
if (needsMediaSubscriptions) {
// Cancel any pending mic-off holddown
if (this.micOffHolddownTimer) {
clearTimeout(this.micOffHolddownTimer);
Expand All @@ -349,10 +366,11 @@ export class MicrophoneManager {
this.micOffHolddownTimer = setTimeout(() => {
// Re-evaluate before actually turning off
this.updateCachedSubscriptionState();
const stillNoMedia = !this.cachedSubscriptionState.hasMedia;
const stillNoMedia = !this.cachedSubscriptionState.needsMedia;
const finalRequiredData = this.calculateRequiredData(
this.cachedSubscriptionState.hasPCM,
this.cachedSubscriptionState.hasTranscription,
this.cachedSubscriptionState.needsPcm,
this.cachedSubscriptionState.needsTranslation,
this.cachedSubscriptionState.transcriptionLanguages,
);
if (stillNoMedia) {
this.updateState(false, finalRequiredData);
Expand Down Expand Up @@ -382,7 +400,7 @@ export class MicrophoneManager {
private updateKeepAliveTimer(): void {
// Check if we should have a keep-alive timer running using cached state
const shouldHaveKeepAlive =
this.enabled && this.cachedSubscriptionState.hasMedia;
this.enabled && this.cachedSubscriptionState.needsMedia;

if (shouldHaveKeepAlive && !this.keepAliveTimer) {
// Start keep-alive timer
Expand All @@ -394,7 +412,7 @@ export class MicrophoneManager {
this.session.websocket.readyState === WebSocket.OPEN
) {
// Use cached state for the check
if (this.cachedSubscriptionState.hasMedia && this.enabled) {
if (this.cachedSubscriptionState.needsMedia && this.enabled) {
this.logger.debug("Sending microphone keep-alive");
this.sendStateChangeToGlasses(
this.lastSentState,
Expand Down Expand Up @@ -436,7 +454,7 @@ export class MicrophoneManager {

// Check if we should NOT be receiving audio using cached state
const shouldMicBeOff =
!this.enabled || !this.cachedSubscriptionState.hasMedia;
!this.enabled || !this.cachedSubscriptionState.needsMedia;

if (shouldMicBeOff) {
// We're receiving audio when we shouldn't be
Expand All @@ -446,8 +464,9 @@ export class MicrophoneManager {

// Send mic off immediately
const requiredData = this.calculateRequiredData(
this.cachedSubscriptionState.hasPCM,
this.cachedSubscriptionState.hasTranscription,
this.cachedSubscriptionState.needsPcm,
this.cachedSubscriptionState.needsTranslation,
this.cachedSubscriptionState.transcriptionLanguages,
);
this.sendStateChangeToGlasses(false, requiredData);

Expand Down
57 changes: 30 additions & 27 deletions cloud/packages/cloud/src/services/session/SubscriptionManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ export class SubscriptionManager {

// Cached aggregates for O(1) reads
private pcmSubscriptionCount: number = 0;
private transcriptionLikeSubscriptionCount: number = 0; // transcription/translation incl. language streams
private translationSubscriptionCount: number = 0;
private transcriptionLanguagesSet: Set<string> = new Set();
private languageStreamCounts: Map<ExtendedStreamType, number> = new Map();

constructor(userSession: UserSession) {
Expand Down Expand Up @@ -136,15 +137,17 @@ export class SubscriptionManager {
return result;
}

hasPCMTranscriptionSubscriptions(): {
hasMedia: boolean;
hasPCM: boolean;
hasTranscription: boolean;
getMediaSubscriptionDetails(): {
needsMedia: boolean,
needsPcm: boolean,
needsTranslation: boolean,
transcriptionLanguages: string[],
} {
const hasPCM = this.pcmSubscriptionCount > 0;
const hasTranscription = this.transcriptionLikeSubscriptionCount > 0;
const hasMedia = hasPCM || hasTranscription;
return { hasMedia, hasPCM, hasTranscription };
const needsPcm = this.pcmSubscriptionCount > 0;
const needsTranslation = this.translationSubscriptionCount > 0;
const transcriptionLanguages: string[] = Array.from(this.transcriptionLanguagesSet);
const needsMedia = needsPcm || needsTranslation || transcriptionLanguages.length > 0;
return { needsMedia, needsPcm, needsTranslation, transcriptionLanguages };
}

cacheCalendarEvent(event: any): void {
Expand Down Expand Up @@ -452,26 +455,26 @@ export class SubscriptionManager {
return;
}

// Direct transcription/translation
if (sub === StreamType.TRANSCRIPTION || sub === StreamType.TRANSLATION) {
this.transcriptionLikeSubscriptionCount += isAdd ? 1 : -1;
if (this.transcriptionLikeSubscriptionCount < 0)
this.transcriptionLikeSubscriptionCount = 0;
return;
const languageStreamInfo = parseLanguageStream(sub as string);
const isLangStream = languageStreamInfo !== null;

if (sub === StreamType.TRANSLATION || (isLangStream && languageStreamInfo.baseType === StreamType.TRANSLATION)) {
this.translationSubscriptionCount += isAdd ? 1 : -1;
if (this.translationSubscriptionCount < 0) this.translationSubscriptionCount = 0;
}

// Language-specific streams
if (isLanguageStream(sub)) {
const langInfo = parseLanguageStream(sub as string);
if (
langInfo &&
(langInfo.type === StreamType.TRANSCRIPTION ||
langInfo.type === StreamType.TRANSLATION)
) {
this.transcriptionLikeSubscriptionCount += isAdd ? 1 : -1;
if (this.transcriptionLikeSubscriptionCount < 0)
this.transcriptionLikeSubscriptionCount = 0;
if (sub === StreamType.TRANSCRIPTION || (isLangStream && languageStreamInfo.baseType === StreamType.TRANSCRIPTION)) {
// in transcriptionLanguagesArray push languageStreamInfo.language
const transcriptionLanguage = languageStreamInfo?.transcribeLanguage || "en-US";
if (isAdd) {
this.transcriptionLanguagesSet.add(transcriptionLanguage);
} else {
this.transcriptionLanguagesSet.delete(transcriptionLanguage);
}
}

// Language-specific streams
if (isLangStream) {
const prev = this.languageStreamCounts.get(sub) || 0;
const next = prev + (isAdd ? 1 : -1);
if (next <= 0) this.languageStreamCounts.delete(sub);
Expand All @@ -481,4 +484,4 @@ export class SubscriptionManager {
}
}

export default SubscriptionManager;
export default SubscriptionManager;
11 changes: 6 additions & 5 deletions cloud/packages/cloud/src/services/session/UserSession.ts
Original file line number Diff line number Diff line change
Expand Up @@ -522,12 +522,13 @@ export class UserSession {
this.subscriptionManager.getAppSubscriptions(packageName);
}

const hasPCMTranscriptionSubscriptions =
this.subscriptionManager.hasPCMTranscriptionSubscriptions();
const requiresAudio = hasPCMTranscriptionSubscriptions.hasMedia;
const mediaSubscriptionDetails =
this.subscriptionManager.getMediaSubscriptionDetails();
const requiresAudio = mediaSubscriptionDetails.needsMedia;
const requiredData = this.microphoneManager.calculateRequiredData(
hasPCMTranscriptionSubscriptions.hasPCM,
hasPCMTranscriptionSubscriptions.hasTranscription,
mediaSubscriptionDetails.needsPcm,
mediaSubscriptionDetails.needsTranslation,
mediaSubscriptionDetails.transcriptionLanguages,
);
// Side-effect: update mic state to reflect current needs
this.microphoneManager.updateState(requiresAudio, requiredData);
Expand Down
11 changes: 6 additions & 5 deletions cloud/packages/cloud/src/services/session/session.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -146,12 +146,13 @@ export class SessionService {

// Calculate streams that need to be active
// const requiresAudio = subscriptionService.hasMediaSubscriptions(userId);
const hasPCMTranscriptionSubscriptions =
userSession.subscriptionManager.hasPCMTranscriptionSubscriptions();
const requiresAudio = hasPCMTranscriptionSubscriptions.hasMedia;
const mediaSubscriptionDetails =
userSession.subscriptionManager.getMediaSubscriptionDetails();
const requiresAudio = mediaSubscriptionDetails.needsMedia;
const requiredData = userSession.microphoneManager.calculateRequiredData(
hasPCMTranscriptionSubscriptions.hasPCM,
hasPCMTranscriptionSubscriptions.hasTranscription,
mediaSubscriptionDetails.needsPcm,
mediaSubscriptionDetails.needsTranslation,
mediaSubscriptionDetails.transcriptionLanguages,
);
userSession.microphoneManager.updateState(requiresAudio, requiredData); // TODO(isaiah): Feels like an odd place to put it, but it works for now.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,7 @@ export class GlassesWebSocketService {
bypassVad: coreInfo.bypass_vad_for_debugging,
bypassAudioEncoding: coreInfo.bypass_audio_encoding_for_debugging,
enforceLocalTranscription: coreInfo.enforce_local_transcription,
localSttLanguage: coreInfo.local_stt_language,
};

logger.debug({ newSettings }, "🔥🔥🔥: newSettings:");
Expand Down
3 changes: 3 additions & 0 deletions mobile/src/managers/Settings.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ export const SETTINGS_KEYS = {
bypass_audio_encoding_for_debugging: "bypass_audio_encoding_for_debugging",
metric_system_enabled: "metric_system_enabled",
enforce_local_transcription: "enforce_local_transcription",
local_stt_language: "local_stt_language",
button_press_mode: "button_press_mode",
default_wearable: "default_wearable",
device_name: "device_name",
Expand Down Expand Up @@ -76,6 +77,8 @@ const DEFAULT_SETTINGS = {
[SETTINGS_KEYS.bypass_audio_encoding_for_debugging]: false,
[SETTINGS_KEYS.metric_system_enabled]: false,
[SETTINGS_KEYS.enforce_local_transcription]: false,
[SETTINGS_KEYS.local_stt_language]: null,

[SETTINGS_KEYS.button_press_mode]: "photo",
[SETTINGS_KEYS.default_wearable]: null,
[SETTINGS_KEYS.device_name]: "",
Expand Down
2 changes: 2 additions & 0 deletions mobile/src/services/STTModelManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import {Platform} from "react-native"
import {NativeModules} from "react-native"
import {TarBz2Extractor} from "./TarBz2Extractor"
import bridge from "@/bridge/MantleBridge"
import settings, { SETTINGS_KEYS } from "@/managers/Settings"

const {BridgeModule, FileProviderModule} = NativeModules

Expand Down Expand Up @@ -389,6 +390,7 @@ class STTModelManager {

this.currentModelId = modelId
const modelPath = this.getModelPath(modelId)
await settings.set(SETTINGS_KEYS.local_stt_language, model.languageCode)
await this.setNativeModelPath(modelPath, model.languageCode)
}

Expand Down
Loading