Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Bookmarkly.App/App.xaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ public partial class App : Application
{
private Window? _window;

public static Window? MainWindow { get; private set; }

/// <summary>
/// Initializes the singleton application object. This is the first line of authored code
/// executed, and as such is the logical equivalent of main() or WinMain().
Expand All @@ -44,6 +46,7 @@ public App()
protected override void OnLaunched(Microsoft.UI.Xaml.LaunchActivatedEventArgs args)
{
_window = new MainWindow();
MainWindow = _window;
_window.Activate();
}
}
Expand Down
5 changes: 5 additions & 0 deletions Bookmarkly.App/Bookmarkly.App.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
<PublishProfile>win-$(Platform).pubxml</PublishProfile>
<UseWinUI>true</UseWinUI>
<EnableMsixTooling>true</EnableMsixTooling>
<EnableWindowsTargeting>true</EnableWindowsTargeting>
</PropertyGroup>

<ItemGroup>
Expand Down Expand Up @@ -36,6 +37,10 @@
<PackageReference Include="Microsoft.WindowsAppSDK" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\Bookmarkly.Transcription.Server\Bookmarkly.Transcription.Server.csproj" />
</ItemGroup>

<!--
Defining the "HasPackageAndPublishMenuAddedByProject" property here allows the Solution
Explorer "Package and Publish" context menu entry to be enabled for this project even if
Expand Down
14 changes: 13 additions & 1 deletion Bookmarkly.App/Package.appxmanifest
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
xmlns="http://schemas.microsoft.com/appx/manifest/foundation/windows10"
xmlns:mp="http://schemas.microsoft.com/appx/2014/phone/manifest"
xmlns:uap="http://schemas.microsoft.com/appx/manifest/uap/windows10"
xmlns:uap5="http://schemas.microsoft.com/appx/manifest/uap/windows10/5"
xmlns:rescap="http://schemas.microsoft.com/appx/manifest/foundation/windows10/restrictedcapabilities"
IgnorableNamespaces="uap rescap">
IgnorableNamespaces="uap uap5 rescap">

<Identity
Name="f2f093fb-2af3-4a7d-9100-613ffeb5769c"
Expand Down Expand Up @@ -42,6 +43,17 @@
<uap:DefaultTile Wide310x150Logo="Assets\Wide310x150Logo.png" />
<uap:SplashScreen Image="Assets\SplashScreen.png" />
</uap:VisualElements>
<Extensions>
<uap5:Extension Category="windows.activatableClass.outOfProcessServer">
<uap5:OutOfProcessServer ServerName="Bookmarkly.Transcription.Server"
uap5:IdentityType="activateAsPackage"
uap5:RunFullTrust="true">
<uap5:Path>Bookmarkly.Transcription.Server\Bookmarkly.Transcription.Server.exe</uap5:Path>
<uap5:Instancing>singleInstance</uap5:Instancing>
<uap5:ActivatableClass ActivatableClassId="Bookmarkly.Transcription.TranscriptionService" />
</uap5:OutOfProcessServer>
</uap5:Extension>
</Extensions>
</Application>
</Applications>

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0-windows10.0.26100.0</TargetFramework>
<RootNamespace>Bookmarkly.Transcription</RootNamespace>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
<CsWinRTEnabled>true</CsWinRTEnabled>
<CsWinRTComponent>true</CsWinRTComponent>
<EnableWindowsTargeting>true</EnableWindowsTargeting>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Windows.SDK.BuildTools" />
</ItemGroup>
</Project>
15 changes: 15 additions & 0 deletions Bookmarkly.Transcription.Abstractions/ITranscriptionService.idl
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
namespace Bookmarkly.Transcription
{
[default_interface]
runtimeclass TranscriptionService : ITranscriptionService
{
TranscriptionService();
}

interface ITranscriptionService
{
Windows.Foundation.IAsyncOperation<String> TranscribeAsync(Windows.Storage.StorageFile audioFile);
Windows.Foundation.IAsyncOperation<String> TranscribeWithLanguageAsync(Windows.Storage.StorageFile audioFile, String languageCode);
Windows.Foundation.IAsyncOperation<Windows.Foundation.Collections.IVector<String> > GetSupportedLanguagesAsync();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>WinExe</OutputType>
<TargetFramework>net10.0-windows10.0.26100.0</TargetFramework>
<RootNamespace>Bookmarkly.Transcription.Server</RootNamespace>
<Platforms>x86;x64;ARM64</Platforms>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
<CsWinRTEnabled>true</CsWinRTEnabled>
<EnableWindowsTargeting>true</EnableWindowsTargeting>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Windows.SDK.BuildTools" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Bookmarkly.Transcription.Abstractions\Bookmarkly.Transcription.Abstractions.csproj" />
<ProjectReference Include="..\Bookmarkly.Transcription\Bookmarkly.Transcription.csproj" />
</ItemGroup>
</Project>
78 changes: 78 additions & 0 deletions Bookmarkly.Transcription.Server/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
using System;
using System.Runtime.InteropServices;
using System.Threading;

namespace Bookmarkly.Transcription.Server;

class Program
{
private static ManualResetEvent? _serverExitEvent;
private static int _refCount = 0;
private static readonly object _lock = new object();

[STAThread]
static int Main(string[] args)
{
try
{
// Handle server lifetime
_serverExitEvent = new ManualResetEvent(false);

// Register for process exit to cleanup
AppDomain.CurrentDomain.ProcessExit += OnProcessExit;
Console.CancelKeyPress += OnCancelKeyPress;

Console.WriteLine("Bookmarkly Transcription Server started.");

// In a real implementation, this would register with COM using
// Windows.ApplicationModel.Core APIs and CoRegisterClassObject.
// For packaged apps, the system handles activation through the manifest.
// The server just needs to stay alive while there are active instances.

// Keep server alive
_serverExitEvent.WaitOne();

Console.WriteLine("Bookmarkly Transcription Server stopped.");
return 0;
}
catch (Exception ex)
{
Console.WriteLine($"Server error: {ex.Message}");
return 1;
}
}

private static void OnProcessExit(object? sender, EventArgs e)
{
_serverExitEvent?.Set();
}

private static void OnCancelKeyPress(object? sender, ConsoleCancelEventArgs e)
{
e.Cancel = true;
_serverExitEvent?.Set();
}

// Called when an instance is created
internal static void AddRef()
{
lock (_lock)
{
_refCount++;
}
}

// Called when an instance is released
internal static void Release()
{
lock (_lock)
{
_refCount--;
if (_refCount <= 0)
{
// No more instances, signal server to exit
_serverExitEvent?.Set();
}
}
}
}
119 changes: 119 additions & 0 deletions Bookmarkly.Transcription/AudioProcessor.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
using NAudio.Wave;
using System;
using System.IO;
using System.Linq;
using System.Threading.Tasks;

namespace Bookmarkly.Transcription;

/// <summary>
/// Audio processor that converts audio files to mel spectrograms for Whisper
/// </summary>
public class AudioProcessor
{
private const int SampleRate = 16000;
private const int MelBins = 80;
private const int WindowSize = 400; // 25ms at 16kHz
private const int HopSize = 160; // 10ms at 16kHz
private const int ChunkSamples = SampleRate * 30; // 30 seconds

/// <summary>
/// Loads and resamples audio file to 16kHz mono
/// </summary>
public async Task<float[]> LoadAudioAsync(string filePath)
{
return await Task.Run(() =>
{
try
{
using var reader = new AudioFileReader(filePath);

// Resample to 16kHz mono if needed
var resampler = new MediaFoundationResampler(reader, new WaveFormat(SampleRate, 1))
{
ResamplerQuality = 60
};

// Estimate capacity based on duration
long estimatedSamples = (long)(reader.TotalTime.TotalSeconds * SampleRate);
var samples = new List<float>((int)Math.Min(estimatedSamples, int.MaxValue));

var buffer = new float[8192];
int read;

while ((read = resampler.Read(buffer, 0, buffer.Length)) > 0)
{
samples.AddRange(buffer.Take(read));
}

resampler.Dispose();
return samples.ToArray();
}
catch (Exception ex)
{
throw new InvalidOperationException($"Failed to load audio file: {ex.Message}", ex);
}
});
}

/// <summary>
/// Converts audio samples to mel spectrogram
/// </summary>
public float[,] ComputeMelSpectrogram(float[] samples)
{
// This is a simplified version
// Production implementation would use proper STFT and mel filterbank

int numFrames = (samples.Length - WindowSize) / HopSize + 1;
var melSpectrogram = new float[MelBins, numFrames];

// Simplified: just compute basic spectral features
for (int frame = 0; frame < numFrames; frame++)
{
int start = frame * HopSize;
var window = samples.Skip(start).Take(WindowSize).ToArray();

// Apply Hann window
for (int i = 0; i < window.Length; i++)
{
window[i] *= 0.5f * (1 - MathF.Cos(2 * MathF.PI * i / (window.Length - 1)));
}

// Simplified mel computation - in production would use FFT + mel filterbank
for (int mel = 0; mel < MelBins; mel++)
{
float energy = 0;
int binSize = window.Length / MelBins;
for (int i = 0; i < binSize; i++)
{
int idx = mel * binSize + i;
if (idx < window.Length)
{
energy += window[idx] * window[idx];
}
}
melSpectrogram[mel, frame] = MathF.Log(MathF.Max(energy, 1e-10f));
}
}

return melSpectrogram;
}

/// <summary>
/// Splits audio into 30-second chunks for processing
/// </summary>
public List<float[]> SplitIntoChunks(float[] samples)
{
var chunks = new List<float[]>();

for (int i = 0; i < samples.Length; i += ChunkSamples)
{
int chunkSize = Math.Min(ChunkSamples, samples.Length - i);
var chunk = new float[chunkSize];
Array.Copy(samples, i, chunk, 0, chunkSize);
chunks.Add(chunk);
}

return chunks;
}
}
17 changes: 17 additions & 0 deletions Bookmarkly.Transcription/Bookmarkly.Transcription.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0-windows10.0.26100.0</TargetFramework>
<RootNamespace>Bookmarkly.Transcription</RootNamespace>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
<EnableWindowsTargeting>true</EnableWindowsTargeting>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.ML.OnnxRuntime" />
<PackageReference Include="Microsoft.ML.OnnxRuntime.DirectML" />
<PackageReference Include="Microsoft.Windows.SDK.BuildTools" />
<PackageReference Include="NAudio" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Bookmarkly.Transcription.Abstractions\Bookmarkly.Transcription.Abstractions.csproj" />
</ItemGroup>
</Project>
Loading