20
20
fetched_attachments : dict [int , bytes ] = {}
21
21
22
22
23
+ def hamming_distance_padded (str1 , str2 ):
24
+ # Pad the shorter string with spaces to match the lengths
25
+ max_len = max (len (str1 ), len (str2 ))
26
+ str1 = str1 .ljust (max_len )
27
+ str2 = str2 .ljust (max_len )
28
+
29
+ return sum (c1 != c2 for c1 , c2 in zip (str1 , str2 ))
30
+
31
+
32
+ def hamming_similarity_score (str1 , str2 ):
33
+ distance = hamming_distance_padded (str1 , str2 )
34
+ max_len = max (len (str1 ), len (str2 ))
35
+ similarity_ratio = (max_len - distance ) / max_len
36
+ return similarity_ratio
37
+
38
+
23
39
async def fetch_attachment (attachment : discord .Attachment , cache : bool = True ) -> bytes :
24
40
if cache and attachment .id in fetched_attachments :
25
41
logger .debug (f"Fetched attachment from cache: { attachment .id } " )
@@ -55,10 +71,7 @@ async def crosspost_cmp(message: discord.Message, other: discord.Message) -> boo
55
71
)
56
72
57
73
if have_content :
58
- hamming_score = sum (
59
- x != y for x , y in zip (message .content , other .content )
60
- ) / max (len (message .content ), len (other .content ))
61
- similarity_score = min (max (0 , 1 - hamming_score ), 1 )
74
+ similarity_score = hamming_similarity_score (message .content , other .content )
62
75
logger .debug (f"Computed similarity score for content: { similarity_score } " )
63
76
else :
64
77
similarity_score = 0
@@ -115,7 +128,8 @@ def __init__(
115
128
bot : BotT ,
116
129
channel_ids : Collection [int ],
117
130
crosspost_timedelta_threshold : int ,
118
- message_length_threshold : int ,
131
+ same_channel_message_length_threshold : int ,
132
+ cross_channel_message_length_threshold : int ,
119
133
max_tracked_users : int ,
120
134
max_tracked_message_groups_per_user : int ,
121
135
theme_color : int | discord .Color = 0 ,
@@ -127,7 +141,10 @@ def __init__(
127
141
bot (BotT): The bot instance.
128
142
channel_ids (Collection[int]): Collection of channel IDs to monitor.
129
143
crosspost_timedelta_threshold (int): Minimum time difference between messages to not be considered crossposts.
130
- message_length_threshold (int): Minimum length of a text-only message to be considered.
144
+ same_channel_message_length_threshold (int): Minimum length of a text-only message to be considered
145
+ if the messages are in the same channel.
146
+ cross_channel_message_length_threshold (int): Minimum length of a text-only message to be considered
147
+ if the messages are in different channels.
131
148
max_tracked_users (int): Maximum number of users to track.
132
149
max_tracked_message_groups_per_user (int): Maximum number of message
133
150
groups to track per user.
@@ -140,7 +157,12 @@ def __init__(
140
157
self .crosspost_timedelta_threshold = crosspost_timedelta_threshold
141
158
self .max_tracked_users = max_tracked_users
142
159
self .max_tracked_message_groups_per_user = max_tracked_message_groups_per_user
143
- self .message_length_threshold = message_length_threshold
160
+ self .same_channel_message_length_threshold = (
161
+ same_channel_message_length_threshold
162
+ )
163
+ self .cross_channel_message_length_threshold = (
164
+ cross_channel_message_length_threshold
165
+ )
144
166
145
167
@commands .Cog .listener ()
146
168
async def on_message (self , message : discord .Message ):
@@ -157,12 +179,18 @@ async def on_message(self, message: discord.Message):
157
179
or (
158
180
message .content
159
181
and not message .attachments
160
- and len (message .content ) < self .message_length_threshold
182
+ and (
183
+ len (message .content )
184
+ < min (
185
+ self .same_channel_message_length_threshold ,
186
+ self .cross_channel_message_length_threshold ,
187
+ )
188
+ )
161
189
)
162
190
):
163
191
return
164
192
165
- logger .debug (f"Received message from { message .author .name } : { message .jump_url } " )
193
+ logger .debug (f"Received noteworthy message from { message .author .name } : { message .jump_url } " )
166
194
167
195
# Attempt to enforce the cache size limit
168
196
for user_id in list (self .crossposting_cache .keys ()):
@@ -189,6 +217,18 @@ async def on_message(self, message: discord.Message):
189
217
for messages in user_cache ["message_groups" ]:
190
218
for existing_message in messages :
191
219
if (
220
+ message .channel .id == existing_message .channel .id
221
+ and len (message .content )
222
+ < self .same_channel_message_length_threshold
223
+ ) or (
224
+ message .channel .id != existing_message .channel .id
225
+ and len (message .content )
226
+ < self .cross_channel_message_length_threshold
227
+ ):
228
+ # enforce same-channel and cross-channel message length thresholds in order for them to be considered crossposts
229
+ continue
230
+
231
+ elif (
192
232
await crosspost_cmp (message , existing_message )
193
233
and message .created_at .timestamp ()
194
234
- existing_message .created_at .timestamp ()
@@ -320,7 +360,8 @@ async def setup(
320
360
max_tracked_users : int = 10 ,
321
361
max_tracked_message_groups_per_user : int = 10 ,
322
362
crosspost_timedelta_threshold : int = 86400 ,
323
- message_length_threshold : int = 64 ,
363
+ same_channel_message_length_threshold : int = 64 ,
364
+ cross_channel_message_length_threshold : int = 16 ,
324
365
theme_color : int | discord .Color = 0 ,
325
366
):
326
367
"""
@@ -332,15 +373,19 @@ async def setup(
332
373
max_tracked_users (int): Maximum number of users to track.
333
374
max_tracked_message_groups_per_user (int): Maximum number of message groups to track per user.
334
375
crosspost_timedelta_threshold (int): Minimum time difference between messages to not be considered crossposts.
335
- message_length_threshold (int): Minimum length of a text-only message to be considered.
376
+ same_channel_message_length_threshold (int): Minimum length of a text-only message to be considered
377
+ if the messages are in the same channel.
378
+ cross_channel_message_length_threshold (int): Minimum length of a text-only message to be considered
379
+ if the messages are in different channels.
336
380
theme_color (int | discord.Color): Theme color for the bot's responses.
337
381
"""
338
382
await bot .add_cog (
339
383
AntiCrosspostCog (
340
384
bot ,
341
385
channel_ids ,
342
386
crosspost_timedelta_threshold ,
343
- message_length_threshold ,
387
+ same_channel_message_length_threshold ,
388
+ cross_channel_message_length_threshold ,
344
389
max_tracked_users ,
345
390
max_tracked_message_groups_per_user ,
346
391
theme_color ,
0 commit comments