From e63cd67e7b024cfc74e61bd0c643b785dd0c334c Mon Sep 17 00:00:00 2001 From: Jim Parslow Date: Thu, 20 Jun 2024 11:53:17 +0100 Subject: [PATCH] feat: support for newline characters Will now find profanity next to newline characters --- ProfanityFilter.Tests.Unit/ProfanityTests.cs | 46 +++++++++++++++++++ .../ProfanityFilter/ProfanityFilter.cs | 10 ++-- 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/ProfanityFilter.Tests.Unit/ProfanityTests.cs b/ProfanityFilter.Tests.Unit/ProfanityTests.cs index 33c48f2..a57b4e9 100644 --- a/ProfanityFilter.Tests.Unit/ProfanityTests.cs +++ b/ProfanityFilter.Tests.Unit/ProfanityTests.cs @@ -390,6 +390,52 @@ public void CensoredStringReturnsStringWithProfanitiesBleepedOut4() Assert.AreEqual(censored, result); } + [TestMethod] + public void CensoredStringReturnsStringWithProfanitiesBleepedOutMultiLine() + { + var filter = new ProfanityFilter(); + filter.AllowList.Add("scunthorpe"); + filter.AllowList.Add("penistone"); + + var censored = filter.CensorString("I fucking live in Scunthorpe and it is a shit place to live. I would much rather live in penistone you great big\ncock\r\nfuck.", '*'); + var result = "I ******* live in Scunthorpe and it is a **** place to live. I would much rather live in penistone you great big\n****\r\n****."; + + Assert.AreEqual(censored, result); + } + + [TestMethod] + public void CensoredStringReturnsStringWithProfanitiesBleepedOutMultiLine2() + { + var filter = new ProfanityFilter(); + + var censored = filter.CensorString("2 girls 1 cup, is my favourite twatting\ntwat\r\n video."); + var result = "* ***** * ***, is my favourite ********\n****\r\n video."; + + Assert.AreEqual(censored, result); + } + + [TestMethod] + public void CensoredStringReturnsStringWithProfanitiesBleepedOutMultiLine3() + { + var filter = new ProfanityFilter(); + + var censored = filter.CensorString("Mary had a\r\n little shit lamb who was a little \r\nfucker\r\n."); + var result = "Mary had a\r\n little **** lamb who was a little \r\n******\r\n."; + + Assert.AreEqual(censored, result); + } + + [TestMethod] + public void CensoredStringReturnsStringWithProfanitiesBleepedOutMultiLine4() + { + var filter = new ProfanityFilter(); + + var censored = filter.CensorString("You are a \nstupid little twat\n, and you like to blow your load in an alaskan\r\npipeline."); + var result = "You are a \n****** little ****\n, and you like to **** **** **** in an alaskan\r\npipeline."; + + Assert.AreEqual(censored, result); + } + [TestMethod] public void CensoredStringReturnsStringWithSingleScunthorpe() diff --git a/ProfanityFilter/ProfanityFilter/ProfanityFilter.cs b/ProfanityFilter/ProfanityFilter/ProfanityFilter.cs index 3af0bc6..32913e3 100644 --- a/ProfanityFilter/ProfanityFilter/ProfanityFilter.cs +++ b/ProfanityFilter/ProfanityFilter/ProfanityFilter.cs @@ -121,7 +121,8 @@ public ReadOnlyCollection DetectAllProfanities(string sentence, bool rem sentence = sentence.Replace(".", ""); sentence = sentence.Replace(",", ""); - var words = sentence.Split(' '); + // Split the string on spaces and newlines + var words = sentence.Split(new[] { ' ', '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); var postAllowList = FilterWordListByAllowList(words); List swearList = new List(); @@ -180,7 +181,8 @@ public string CensorString(string sentence, char censorCharacter, bool ignoreNum noPunctuation = Regex.Replace(noPunctuation, @"[^\w\s]", ""); - var words = noPunctuation.Split(' '); + // Split the string on spaces and newlines + var words = noPunctuation.Split(new[] { ' ', '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); var postAllowList = FilterWordListByAllowList(words); var swearList = new List(); @@ -225,7 +227,7 @@ public string CensorString(string sentence, char censorCharacter, bool ignoreNum // Work backwards in string to get to the start of the word. while (startIndex > 0) { - if (toCheck[startIndex - 1] == ' ' || char.IsPunctuation(toCheck[startIndex - 1])) + if (toCheck[startIndex - 1] == ' ' || toCheck[startIndex - 1] == '\n' || toCheck[startIndex - 1] == '\r' || char.IsPunctuation(toCheck[startIndex - 1])) { break; } @@ -236,7 +238,7 @@ public string CensorString(string sentence, char censorCharacter, bool ignoreNum // Work forwards to get to the end of the word. while (endIndex < toCheck.Length) { - if (toCheck[endIndex] == ' ' || char.IsPunctuation(toCheck[endIndex])) + if (toCheck[endIndex] == ' ' || toCheck[endIndex] == '\n' || toCheck[endIndex] == '\r' || char.IsPunctuation(toCheck[endIndex])) { break; }