Skip to content

Commit

Permalink
chore: add and improve comments
Browse files Browse the repository at this point in the history
  • Loading branch information
dclipca committed Feb 24, 2025
1 parent 92e2299 commit 77c55c1
Show file tree
Hide file tree
Showing 16 changed files with 505 additions and 198 deletions.
12 changes: 9 additions & 3 deletions SpongeEngine.SubtitleSharp/Parsers/ISubtitleParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,24 @@
namespace SpongeEngine.SubtitleSharp.Parsers
{
/// <summary>
/// Interface specifying the required methods for a subtitle parser.
/// Defines methods required for a subtitle parser.
/// </summary>
public interface ISubtitleParser
{
/// <summary>
/// Parses a subtitles file stream using the given encoding.
/// Parses a subtitle stream using the specified encoding.
/// </summary>
/// <param name="stream">The stream containing subtitle data.</param>
/// <param name="encoding">The character encoding used to read the stream.</param>
/// <returns>A list of <see cref="SubtitleItem"/> objects parsed from the stream.</returns>
List<SubtitleItem> ParseStream(Stream stream, Encoding encoding);

/// <summary>
/// Parses a subtitles file stream using the specified options.
/// Parses a subtitle stream using the provided parser options.
/// </summary>
/// <param name="stream">The stream containing subtitle data.</param>
/// <param name="options">Options to control parsing (encoding and timecode mode).</param>
/// <returns>A list of <see cref="SubtitleItem"/> objects parsed from the stream.</returns>
List<SubtitleItem> ParseStream(Stream stream, SubtitleParserOptions options);
}
}
54 changes: 47 additions & 7 deletions SpongeEngine.SubtitleSharp/Parsers/SrtParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,47 @@

namespace SpongeEngine.SubtitleSharp.Parsers
{
/// <summary>
/// Implements parsing for SubRip (SRT) subtitle files.
///
/// An SRT file typically has the following structure:
///
/// 1
/// 00:18:03,875 --> 00:18:04,231
/// Oh?
///
/// 2
/// 00:18:05,194 --> 00:18:05,905
/// What was that?
/// </summary>
public class SrtParser : ISubtitleParser
{
private static readonly string[] _timecodeDelimiters = { "-->", "- >", "->" };

/// <summary>
/// Initializes a new instance of the <see cref="SrtParser"/> class.
/// </summary>
public SrtParser() { }

// For backward compatibility:
/// <summary>
/// Parses an SRT stream using the specified encoding.
/// </summary>
/// <param name="srtStream">A seekable and readable stream containing SRT data.</param>
/// <param name="encoding">The character encoding used to read the stream.</param>
/// <returns>A list of <see cref="SubtitleItem"/> objects parsed from the stream.</returns>
public List<SubtitleItem> ParseStream(Stream srtStream, Encoding encoding)
{
return ParseStream(srtStream, new SubtitleParserOptions { Encoding = encoding, TimecodeMode = SubtitleTimecodeMode.Required });
}

/// <summary>
/// Parses an SRT stream using the provided parser options.
/// </summary>
/// <param name="srtStream">A seekable and readable stream containing SRT data.</param>
/// <param name="options">Options specifying encoding and timecode mode.</param>
/// <returns>A list of <see cref="SubtitleItem"/> objects extracted from the stream.</returns>
/// <exception cref="ArgumentException">Thrown if the stream is not readable/seekable or if a subtitle block is invalid.</exception>
public List<SubtitleItem> ParseStream(Stream srtStream, SubtitleParserOptions options)
{
if (!srtStream.CanRead || !srtStream.CanSeek)
Expand All @@ -40,7 +69,7 @@ public List<SubtitleItem> ParseStream(Stream srtStream, SubtitleParserOptions op
bool timecodeFound = false;
foreach (string line in lines)
{
Console.WriteLine($"[DEBUG] Parsing line: {line}");
// Debug logging can be enabled here if needed.
if (!timecodeFound)
{
int startTc, endTc;
Expand All @@ -56,7 +85,6 @@ public List<SubtitleItem> ParseStream(Stream srtStream, SubtitleParserOptions op
}
else
{
Console.WriteLine($"[DEBUG] Timecode parsing failed for line: {line}");
throw new ArgumentException($"Invalid timecode in line: {line}");
}
}
Expand Down Expand Up @@ -87,6 +115,11 @@ public List<SubtitleItem> ParseStream(Stream srtStream, SubtitleParserOptions op
return items;
}

/// <summary>
/// Splits the SRT file into individual subtitle blocks using blank lines as delimiters.
/// </summary>
/// <param name="reader">A <see cref="TextReader"/> for the SRT file.</param>
/// <returns>An enumerable sequence of subtitle block strings.</returns>
private IEnumerable<string> GetSrtSubTitleParts(TextReader reader)
{
string line;
Expand All @@ -109,11 +142,16 @@ private IEnumerable<string> GetSrtSubTitleParts(TextReader reader)
yield return stringBuilder.ToString();
}

/// <summary>
/// Attempts to parse a timecode line in an SRT block into start and end timecodes.
/// </summary>
/// <param name="line">A line expected to contain two timecodes separated by a delimiter.</param>
/// <param name="startTc">Output start timecode (in milliseconds) if parsing succeeds; otherwise -1.</param>
/// <param name="endTc">Output end timecode (in milliseconds) if parsing succeeds; otherwise -1.</param>
/// <returns><c>true</c> if parsing is successful; otherwise, <c>false</c>.</returns>
public static bool TryParseTimecodeLine(string line, out int startTc, out int endTc)
{
string[] parts = line.Split(_timecodeDelimiters, StringSplitOptions.None);
Console.WriteLine($"Line: {line}");
Console.WriteLine($"Parts: {string.Join(" | ", parts)}");
if (parts.Length != 2)
{
startTc = -1;
Expand All @@ -122,10 +160,14 @@ public static bool TryParseTimecodeLine(string line, out int startTc, out int en
}
startTc = ParseSrtTimecode(parts[0].Trim());
endTc = ParseSrtTimecode(parts[1].Trim());
Console.WriteLine($"Start Timecode: {startTc}, End Timecode: {endTc}");
return startTc != -1 && endTc != -1;
}

/// <summary>
/// Parses an SRT timecode string into its equivalent value in milliseconds.
/// </summary>
/// <param name="s">An SRT timecode string in the format hh:mm:ss,fff.</param>
/// <returns>The timecode in milliseconds, or -1 if parsing fails.</returns>
public static int ParseSrtTimecode(string s)
{
Match match = Regex.Match(s, @"^(\d{2}):(\d{2}):(\d{2}),(\d{3})$");
Expand All @@ -135,12 +177,10 @@ public static int ParseSrtTimecode(string s)
int minutes = int.Parse(match.Groups[2].Value);
int seconds = int.Parse(match.Groups[3].Value);
int milliseconds = int.Parse(match.Groups[4].Value);
Console.WriteLine($"Parsed timecode: {hours}:{minutes}:{seconds},{milliseconds}");
return (int)(new TimeSpan(hours, minutes, seconds).TotalMilliseconds + milliseconds);
}
else
{
Console.WriteLine($"Failed to parse timecode: {s}");
return -1;
}
}
Expand Down
28 changes: 25 additions & 3 deletions SpongeEngine.SubtitleSharp/Parsers/SsaParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,32 @@
namespace SpongeEngine.SubtitleSharp.Parsers
{
/// <summary>
/// Parser for SubStation Alpha (SSA) subtitle files.
/// Implements parsing for the SubStation Alpha (SSA) subtitles format.
///
/// The SSA format is a structured subtitle format that includes sections such as [Script Info] and [Events].
/// This parser focuses on extracting dialogue entries from the [Events] section.
/// </summary>
public class SsaParser : ISubtitleParser
{
// For backward compatibility:
/// <summary>
/// Parses an SSA stream using the specified encoding.
/// </summary>
/// <param name="ssaStream">A seekable and readable stream containing SSA subtitle data.</param>
/// <param name="encoding">The character encoding used to read the stream.</param>
/// <returns>A list of <see cref="SubtitleItem"/> objects parsed from the stream.</returns>
public List<SubtitleItem> ParseStream(Stream ssaStream, Encoding encoding)
{
return ParseStream(ssaStream, new SubtitleParserOptions { Encoding = encoding, TimecodeMode = SubtitleTimecodeMode.Required });
}

/// <summary>
/// Parses an SSA stream using the provided parser options.
/// </summary>
/// <param name="ssaStream">A seekable and readable stream containing SSA subtitle data.</param>
/// <param name="options">Parser options including encoding and timecode mode.</param>
/// <returns>A list of <see cref="SubtitleItem"/> objects extracted from the stream.</returns>
/// <exception cref="ArgumentException">Thrown if the stream is not readable/seekable or if the format is invalid.</exception>
public List<SubtitleItem> ParseStream(Stream ssaStream, SubtitleParserOptions options)
{
if (!ssaStream.CanRead || !ssaStream.CanSeek)
Expand Down Expand Up @@ -74,6 +90,7 @@ public List<SubtitleItem> ParseStream(Stream ssaStream, SubtitleParserOptions op
if (!string.IsNullOrEmpty(textLine))
{
List<string> lines;
// Choose splitting strategy based on wrap style.
switch (wrapStyle)
{
case SsaWrapStyle.Smart:
Expand Down Expand Up @@ -122,11 +139,16 @@ public List<SubtitleItem> ParseStream(Stream ssaStream, SubtitleParserOptions op
}
else
{
string message = string.Format("We reached line '{0}' with line number #{1} without finding the Event section ({2})", line, lineNumber, SsaFormatConstants.EVENT_LINE);
string message = string.Format("Reached end of header at line '{0}' (line #{1}) without finding the Event section ({2})", line, lineNumber, SsaFormatConstants.EVENT_LINE);
throw new ArgumentException(message);
}
}

/// <summary>
/// Parses an SSA timecode string into its equivalent value in milliseconds.
/// </summary>
/// <param name="s">The SSA timecode string to parse.</param>
/// <returns>The timecode in milliseconds, or -1 if parsing fails.</returns>
private int ParseSsaTimecode(string s)
{
if (TimeSpan.TryParse(s, out TimeSpan result))
Expand All @@ -139,4 +161,4 @@ private int ParseSsaTimecode(string s)
}
}
}
}
}
54 changes: 52 additions & 2 deletions SpongeEngine.SubtitleSharp/Parsers/SubtitleParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@

namespace SpongeEngine.SubtitleSharp.Parsers
{
/// <summary>
/// Provides functionality for parsing subtitles from various formats.
///
/// This class selects the appropriate subtitle parser based on file extension or preferred format
/// and delegates parsing to the underlying format-specific parser.
/// </summary>
public class SubtitleParser
{
private readonly Dictionary<SubtitlesFormat, ISubtitleParser> _subFormatToParser = new Dictionary<SubtitlesFormat, ISubtitleParser>
Expand All @@ -12,11 +18,18 @@ public class SubtitleParser
{ SubtitlesFormat.WebVttFormat, new VttParser() },
};

/// <summary>
/// Initializes a new instance of the <see cref="SubtitleParser"/> class.
/// </summary>
public SubtitleParser() { }

/// <summary>
/// Gets the most likely format based on the file’s extension.
/// Determines the most likely subtitle format based on the file extension.
/// </summary>
/// <param name="fileName">The subtitle file name.</param>
/// <returns>
/// A <see cref="SubtitlesFormat"/> that best matches the file extension, or <c>null</c> if no match is found.
/// </returns>
public SubtitlesFormat GetMostLikelyFormat(string fileName)
{
string extension = Path.GetExtension(fileName);
Expand All @@ -33,6 +46,14 @@ public SubtitlesFormat GetMostLikelyFormat(string fileName)
return null;

Check warning on line 46 in SpongeEngine.SubtitleSharp/Parsers/SubtitleParser.cs

View workflow job for this annotation

GitHub Actions / test

Possible null reference return.
}

/// <summary>
/// Parses subtitle content provided as a string.
/// </summary>
/// <param name="subtitleContent">The subtitle content.</param>
/// <param name="encoding">The text encoding (defaults to UTF-8 if null).</param>
/// <param name="preferredFormat">An optional preferred subtitle format.</param>
/// <returns>A list of <see cref="SubtitleItem"/> objects extracted from the content.</returns>
/// <exception cref="ArgumentException">Thrown if the subtitle content is null or empty.</exception>
public List<SubtitleItem> ParseText(string subtitleContent, Encoding? encoding = null, SubtitlesFormat? preferredFormat = null)
{
if (string.IsNullOrWhiteSpace(subtitleContent))
Expand All @@ -43,11 +64,23 @@ public List<SubtitleItem> ParseText(string subtitleContent, Encoding? encoding =
return ParseStream(stream, new SubtitleParserOptions { Encoding = encoding, TimecodeMode = SubtitleTimecodeMode.Required }, preferredFormat);

Check warning on line 64 in SpongeEngine.SubtitleSharp/Parsers/SubtitleParser.cs

View workflow job for this annotation

GitHub Actions / test

Possible null reference argument for parameter 'subFormat' in 'List<SubtitleItem> SubtitleParser.ParseStream(Stream stream, SubtitleParserOptions options, SubtitlesFormat subFormat = null)'.
}

/// <summary>
/// Parses subtitles from a stream using default options (UTF-8 and required timecodes).
/// </summary>
/// <param name="stream">The input stream containing subtitle data.</param>
/// <returns>A list of <see cref="SubtitleItem"/> objects parsed from the stream.</returns>
public List<SubtitleItem> ParseStream(Stream stream)
{
return ParseStream(stream, new SubtitleParserOptions { Encoding = Encoding.UTF8, TimecodeMode = SubtitleTimecodeMode.Required });
}

/// <summary>
/// Parses subtitles from a stream using the specified options and an optional preferred format.
/// </summary>
/// <param name="stream">The input stream containing subtitle data.</param>
/// <param name="options">Parser options including encoding and timecode mode.</param>
/// <param name="subFormat">An optional preferred subtitle format to prioritize during parsing.</param>
/// <returns>A list of <see cref="SubtitleItem"/> objects extracted from the stream.</returns>
public List<SubtitleItem> ParseStream(Stream stream, SubtitleParserOptions options, SubtitlesFormat subFormat = null)

Check warning on line 84 in SpongeEngine.SubtitleSharp/Parsers/SubtitleParser.cs

View workflow job for this annotation

GitHub Actions / test

Cannot convert null literal to non-nullable reference type.

Check warning on line 84 in SpongeEngine.SubtitleSharp/Parsers/SubtitleParser.cs

View workflow job for this annotation

GitHub Actions / test

Cannot convert null literal to non-nullable reference type.
{
Dictionary<SubtitlesFormat, ISubtitleParser> dictionary = subFormat != null ?
Expand All @@ -59,6 +92,16 @@ public List<SubtitleItem> ParseStream(Stream stream, SubtitleParserOptions optio
return ParseStream(stream, options, dictionary);
}

/// <summary>
/// Iterates through available subtitle parsers to extract subtitle items from the stream.
/// </summary>
/// <param name="stream">The input stream containing subtitle data.</param>
/// <param name="options">Parser options including encoding and timecode mode.</param>
/// <param name="subFormatDictionary">A dictionary mapping subtitle formats to their respective parsers.</param>
/// <returns>A list of <see cref="SubtitleItem"/> objects parsed from the stream.</returns>
/// <exception cref="ArgumentException">
/// Thrown if the stream is not readable or if no parser can successfully extract subtitle items.
/// </exception>
public List<SubtitleItem> ParseStream(Stream stream, SubtitleParserOptions options, Dictionary<SubtitlesFormat, ISubtitleParser> subFormatDictionary)
{
if (!stream.CanRead)
Expand All @@ -85,7 +128,7 @@ public List<SubtitleItem> ParseStream(Stream stream, SubtitleParserOptions optio
}
catch (Exception)
{
// Continue with next parser.
// Continue with next parser if current fails.
continue;
}
}
Expand All @@ -97,6 +140,13 @@ public List<SubtitleItem> ParseStream(Stream stream, SubtitleParserOptions optio
throw new ArgumentException(message);
}

/// <summary>
/// Logs the first few characters of the stream for diagnostic purposes.
/// </summary>
/// <param name="stream">The input stream.</param>
/// <param name="nbOfCharactersToPrint">The number of characters to log.</param>
/// <param name="encoding">The encoding used to read the stream.</param>
/// <returns>A string containing the initial part of the stream.</returns>
private string LogFirstCharactersOfStream(Stream stream, int nbOfCharactersToPrint, Encoding encoding)
{
string message = "";
Expand Down
13 changes: 13 additions & 0 deletions SpongeEngine.SubtitleSharp/Parsers/SubtitleParserOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,22 @@

namespace SpongeEngine.SubtitleSharp.Parsers
{
/// <summary>
/// Specifies options for parsing subtitle streams.
/// </summary>
public class SubtitleParserOptions
{
/// <summary>
/// Gets or sets the text encoding used to read subtitle streams.
/// Defaults to UTF-8.
/// </summary>
public Encoding Encoding { get; set; } = Encoding.UTF8;

/// <summary>
/// Gets or sets the mode for handling subtitle timecodes.
/// When set to <see cref="SubtitleTimecodeMode.Required"/>, timecodes must be present;
/// when set to <see cref="SubtitleTimecodeMode.Optional"/>, dummy timecodes will be assigned if missing.
/// </summary>
public SubtitleTimecodeMode TimecodeMode { get; set; } = SubtitleTimecodeMode.Required;
}
}
13 changes: 11 additions & 2 deletions SpongeEngine.SubtitleSharp/Parsers/SubtitleTimecodeMode.cs
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
namespace SpongeEngine.SubtitleSharp.Parsers
{
/// <summary>
/// Specifies how subtitle timecodes should be handled during parsing.
/// </summary>
public enum SubtitleTimecodeMode
{
Required, // Timecodes must be present.
Optional // Timecodes may be missing; dummy timecodes will be assigned.
/// <summary>
/// Timecodes must be present in the subtitle data.
/// </summary>
Required,
/// <summary>
/// Timecodes may be missing; in such cases, dummy timecodes will be assigned.
/// </summary>
Optional
}
}
Loading

0 comments on commit 77c55c1

Please sign in to comment.