diff --git a/FeedCord/docs/reference.md b/FeedCord/docs/reference.md index 2557e2b..9a4e07c 100644 --- a/FeedCord/docs/reference.md +++ b/FeedCord/docs/reference.md @@ -243,6 +243,70 @@ Luckily you can simply do this to do a filter for all feeds - set `Url` equal to } ``` +### Initial Fetch Configuration + +When setting up a new feed, you might want to backfill recent posts instead of starting from scratch. The `InitialFetchCount` and `OnwardDate` properties allow you to control this behavior. + +**InitialFetchCount**: Specifies how many of the most recent posts to fetch when the feed worker first initializes. By default, this is set to 0, meaning no initial fetch occurs and the feed will only post new items going forward. + +**OnwardDate**: An optional date filter that works in conjunction with `InitialFetchCount`. When specified, only posts published on or after this date will be included in the initial fetch. + +Here's an example of backfilling the last 10 posts: + +``` +{ + "Instances": [ + { + "Id": "Tech News", + "YoutubeUrls": [ + "" + ], + "RssUrls": [ + "https://example.com/feed.rss" + ], + "DiscordWebhookUrl": "https://discord.com/api/webhooks/...", + "RssCheckIntervalMinutes": 15, + "Color": 8411391, + "DescriptionLimit": 500, + "Forum": true, + "MarkdownFormat": false, + "PersistenceOnShutdown": true, + "InitialFetchCount": 10 + } + ], + "ConcurrentRequests": 40 +} +``` + +Here's an example combining both properties to fetch recent posts from a specific date onwards: + +``` +{ + "Instances": [ + { + "Id": "Tech News", + "YoutubeUrls": [ + "" + ], + "RssUrls": [ + "https://example.com/feed.rss" + ], + "DiscordWebhookUrl": "https://discord.com/api/webhooks/...", + "RssCheckIntervalMinutes": 15, + "Color": 8411391, + "DescriptionLimit": 500, + "Forum": true, + "MarkdownFormat": false, + "PersistenceOnShutdown": true, + "InitialFetchCount": 20, + "OnwardDate": "2025-11-01T00:00:00" + } + ], + "ConcurrentRequests": 40 +} +``` + +In this example, FeedCord will fetch up to 20 of the most recent posts, but only include those published on or after November 1st, 2025. --- @@ -274,6 +338,8 @@ Luckily you can simply do this to do a filter for all feeds - set `Url` equal to - **ConcurrentRequests**: How many requests FeedCord can have going at once. - **ConcurrentRequests (Inside Instance)**: How many requests the instance itself can have going at once. - **PostFilters**: A collection of phrases/words that are used to filter out RSS Items (filters the Title & Content) +- **InitialFetchCount**: The number of most recent posts to fetch when the feed worker initializes. Default is 0 (no initial fetch). Useful for backfilling posts when starting a new feed. +- **OnwardDate**: An optional date filter that works with InitialFetchCount. Only posts published on or after this date will be fetched during initial fetch. Format: ISO 8601 date string (e.g., "2025-11-19T00:00:00"). --- diff --git a/FeedCord/src/Common/Config.cs b/FeedCord/src/Common/Config.cs index f2b8df1..d2f090c 100644 --- a/FeedCord/src/Common/Config.cs +++ b/FeedCord/src/Common/Config.cs @@ -39,5 +39,7 @@ public class Config public List? PostFilters { get; set; } public Dictionary? Pings { get; set; } public int ConcurrentRequests { get; set; } = 5; + public int InitialFetchCount { get; set; } = 0; + public DateTime? OnwardDate { get; set; } } } diff --git a/FeedCord/src/Infrastructure/Workers/FeedWorker.cs b/FeedCord/src/Infrastructure/Workers/FeedWorker.cs index e7462bd..8fb2be1 100644 --- a/FeedCord/src/Infrastructure/Workers/FeedWorker.cs +++ b/FeedCord/src/Infrastructure/Workers/FeedWorker.cs @@ -1,4 +1,5 @@ -using FeedCord.Common; +using System.Runtime.InteropServices; +using FeedCord.Common; using FeedCord.Core.Interfaces; using FeedCord.Services.Interfaces; using Microsoft.Extensions.Hosting; @@ -17,6 +18,8 @@ public class FeedWorker : BackgroundService private readonly bool _persistent; private readonly string _id; private readonly int _delayTime; + private readonly int _initialFetchCount; + private readonly DateTime? _onwardDate; private bool _isInitialized; @@ -36,6 +39,8 @@ public FeedWorker( _id = config.Id; _isInitialized = false; _persistent = config.PersistenceOnShutdown; + _initialFetchCount = config.InitialFetchCount; + _onwardDate = config.OnwardDate; _logAggregator = logAggregator; logger.LogInformation("{id} Created with check interval {Interval} minutes", @@ -73,14 +78,22 @@ protected override async Task ExecuteAsync(CancellationToken stoppingToken) private async Task RunRoutineBackgroundProcessAsync() { + var posts = new List(); + if (!_isInitialized) { _logger.LogInformation("{id}: Initializing Url Checks..", _id); await _feedManager.InitializeUrlsAsync(); _isInitialized = true; + if (_initialFetchCount != 0) + { + _logger.LogInformation("{Id}: Starting Initial Fetch Count {FetchCount}", _id, _initialFetchCount); + posts = await _feedManager.CheckForLastPostsAsync(_initialFetchCount, _onwardDate); + } } - var posts = await _feedManager.CheckForNewPostsAsync(); + var newPosts = await _feedManager.CheckForNewPostsAsync(); + posts.AddRange(newPosts); if (posts.Count > 0) { diff --git a/FeedCord/src/Services/FeedManager.cs b/FeedCord/src/Services/FeedManager.cs index 95325c5..9610b71 100644 --- a/FeedCord/src/Services/FeedManager.cs +++ b/FeedCord/src/Services/FeedManager.cs @@ -60,6 +60,24 @@ public async Task> CheckForNewPostsAsync() return allNewPosts.ToList(); } + + public async Task> CheckForLastPostsAsync(int maxPostCount, DateTime? onwardDate) + { + ConcurrentBag allNewPosts = new(); + + var tasks = _feedStates.Select(async (feed) => + await CheckSingleFeedAsync(feed.Key, feed.Value, allNewPosts, _config.DescriptionLimit, false)); + + await Task.WhenAll(tasks); + + var filteredPosts = allNewPosts + .Where(p => !onwardDate.HasValue || p.PublishDate > onwardDate.Value) + .OrderByDescending(p => p.PublishDate).Take(maxPostCount).ToList(); + + _logAggregator.SetNewPostCount(filteredPosts.Count); + + return filteredPosts.ToList(); + } public async Task InitializeUrlsAsync() { var id = _config.Id; @@ -185,7 +203,7 @@ private async Task TestUrlAsync(string url) return false; } - private async Task CheckSingleFeedAsync(string url, FeedState feedState, ConcurrentBag newPosts, int trim) + private async Task CheckSingleFeedAsync(string url, FeedState feedState, ConcurrentBag newPosts, int trim, bool filterByLastPublishedDate = true) { List posts; @@ -207,28 +225,53 @@ await FetchYoutubeAsync(url) : _instancedConcurrentRequests.Release(); } - var freshlyFetched = posts.Where(p => p?.PublishDate > feedState.LastPublishDate).ToList(); + var fetchedPosts = posts; + if (filterByLastPublishedDate) + { + fetchedPosts = fetchedPosts.Where(p => p?.PublishDate > feedState.LastPublishDate).ToList(); + } - if (freshlyFetched.Any()) + if (fetchedPosts.Count == 0) { - feedState.LastPublishDate = freshlyFetched.Max(p => p!.PublishDate); - feedState.ErrorCount = 0; + _logAggregator.AddLatestUrlPost(url, posts.OrderByDescending(p => p?.PublishDate).FirstOrDefault()); + return; + } + + feedState.LastPublishDate = fetchedPosts.Max(p => p!.PublishDate); + feedState.ErrorCount = 0; - foreach (var post in freshlyFetched) + foreach (var post in fetchedPosts) + { + if (post is null) { - if (post is null) + _logger.LogWarning("Failed to parse a post from {Url}", url); + continue; + } + + //TODO --> Implement Filter checking in to a helper/service & remove from FeedManager + if (_hasFilterEnabled && _config.PostFilters != null) + { + var filter = _config.PostFilters.FirstOrDefault(wf => wf.Url == url); + if (filter != null) { - _logger.LogWarning("Failed to parse a post from {Url}", url); - continue; - } + var filterFound = FilterConfigs.GetFilterSuccess(post, filter.Filters.ToArray()); - //TODO --> Implement Filter checking in to a helper/service & remove from FeedManager - if (_hasFilterEnabled && _config.PostFilters != null) + if (filterFound) + { + newPosts.Add(post); + } + else + { + _logger.LogInformation( + "A new post was omitted because it does not comply to the set filter: {Url}", url); + } + } + else if (_hasAllFilter) { - var filter = _config.PostFilters.FirstOrDefault(wf => wf.Url == url); - if (filter != null) + var allFilter = _config.PostFilters.FirstOrDefault(wf => wf.Url == "all"); + if (allFilter != null) { - var filterFound = FilterConfigs.GetFilterSuccess(post, filter.Filters.ToArray()); + var filterFound = FilterConfigs.GetFilterSuccess(post, allFilter.Filters.ToArray()); if (filterFound) { @@ -240,34 +283,12 @@ await FetchYoutubeAsync(url) : "A new post was omitted because it does not comply to the set filter: {Url}", url); } } - else if (_hasAllFilter) - { - var allFilter = _config.PostFilters.FirstOrDefault(wf => wf.Url == "all"); - if (allFilter != null) - { - var filterFound = FilterConfigs.GetFilterSuccess(post, allFilter.Filters.ToArray()); - - if (filterFound) - { - newPosts.Add(post); - } - else - { - _logger.LogInformation( - "A new post was omitted because it does not comply to the set filter: {Url}", url); - } - } - } - } - else - { - newPosts.Add(post); } } - } - else - { - _logAggregator.AddLatestUrlPost(url, posts.OrderByDescending(p => p?.PublishDate).FirstOrDefault()); + else + { + newPosts.Add(post); + } } } diff --git a/FeedCord/src/Services/Interfaces/IFeedManager.cs b/FeedCord/src/Services/Interfaces/IFeedManager.cs index 99ee9fd..bfd381d 100644 --- a/FeedCord/src/Services/Interfaces/IFeedManager.cs +++ b/FeedCord/src/Services/Interfaces/IFeedManager.cs @@ -5,6 +5,7 @@ namespace FeedCord.Services.Interfaces public interface IFeedManager { Task> CheckForNewPostsAsync(); + Task> CheckForLastPostsAsync(int maxPostCount, DateTime? onwardDate); Task InitializeUrlsAsync(); IReadOnlyDictionary GetAllFeedData(); } diff --git a/README.md b/README.md index 04ce774..462c448 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,8 @@ Provided below is a quick guide to get up and running. "Color": 8411391, "DescriptionLimit": 250, "MarkdownFormat": false, + "InitialFetchCount": 20, + "OnwardDate": "2025-11-01T00:00:00", "PersistenceOnShutdown": true } ],