-
Notifications
You must be signed in to change notification settings - Fork 5
IP cache #53
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
IP cache #53
Changes from 57 commits
7d0974f
783f4ba
ddd8228
ee9ba36
0f7b001
0cbc3f6
b97ed33
4562e8c
843cb61
6cf4d8d
99b0e01
fac8c39
ab6fbf4
6bc8997
9762959
e528a45
39addb2
0c5481e
941107c
ef69ce2
6a7e961
0564307
8204a97
0dc3507
f966986
05a1532
4421776
147427e
16824bf
c5fae7f
57d903f
b86acc3
d5f4a23
0213b6e
2fad4a8
42c10eb
3e27d77
288be2d
0905262
a7082bd
54cb126
4ec694c
47ddd3d
68beba2
f765d6d
d470e39
8ba115b
2a95243
ca697d3
129d6a5
e4005b7
fa28f7c
9b2cd9a
89570ad
ca5416a
a111070
b9369c9
df9e7f5
074293e
d302c1e
d254ca6
f623ac4
3e7990c
7217a7a
1a47421
37c6e31
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -39,3 +39,4 @@ plotly==4.5.0 | |
| pdoc==0.3.2 | ||
| markdown>=3.0 | ||
| kafka-python==2.0.1 | ||
| cachetools | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -34,3 +34,4 @@ isoweek==1.3.3 | |
| pdoc==0.3.2 | ||
| spark-testing-base | ||
| kafka-python==2.0.1 | ||
| cachetools | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,112 @@ | ||
| # Copyright (c) 2020, eQualit.ie inc. | ||
| # All rights reserved. | ||
| # | ||
| # This source code is licensed under the BSD-style license found in the | ||
| # LICENSE file in the root directory of this source tree. | ||
|
|
||
| import os | ||
| import pickle | ||
| import threading | ||
|
|
||
| from cachetools import TTLCache | ||
|
|
||
| from baskerville.util.helpers import get_default_ip_cache_path | ||
| from baskerville.util.singleton_thread_safe import SingletonThreadSafe | ||
|
|
||
|
|
||
| class IPCache(metaclass=SingletonThreadSafe): | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 for the ThreadSafe and all the lock usage, cool! |
||
|
|
||
| def __init__(self, config, logger): | ||
| super().__init__() | ||
|
|
||
| self.logger = logger | ||
| self.lock = threading.Lock() | ||
|
|
||
| folder_path = get_default_ip_cache_path() | ||
| if not os.path.exists(folder_path): | ||
| os.mkdir(folder_path) | ||
|
|
||
| self.full_path_passed_challenge = os.path.join(folder_path, 'ip_cache_passed_challenge.bin') | ||
| if os.path.exists(self.full_path_passed_challenge): | ||
| self.logger.info(f'Loading passed challenge IP cache from file {self.full_path_passed_challenge}...') | ||
| with open(self.full_path_passed_challenge, 'rb') as f: | ||
| self.cache_passed = pickle.load(f) | ||
| else: | ||
| self.cache_passed = TTLCache( | ||
| maxsize=config.engine.ip_cache_passed_challenge_size, | ||
| ttl=config.engine.ip_cache_passed_challenge_ttl) | ||
| self.logger.info('A new instance of passed challege IP cache has been created') | ||
|
|
||
| self.full_path_pending = os.path.join(folder_path, 'ip_cache_pending.bin') | ||
| if os.path.exists(self.full_path_pending): | ||
| self.logger.info(f'Loading pending challenge IP cache from file {self.full_path_pending}...') | ||
| with open(self.full_path_pending, 'rb') as f: | ||
| self.cache_pending = pickle.load(f) | ||
| else: | ||
| self.cache_pending = TTLCache( | ||
| maxsize=config.engine.ip_cache_pending_size, | ||
| ttl=config.engine.ip_cache_pending_ttl) | ||
| self.logger.info('A new instance of pending IP cache has been created') | ||
mkaranasou marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| def update(self, records): | ||
| with self.lock: | ||
mkaranasou marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| self.logger.info('IP cache updating...') | ||
| if len(self.cache_passed) > 0.98 * self.cache_passed.maxsize: | ||
| self.logger.warning('IP cache passed challenge is 98% full. ') | ||
| if len(self.cache_pending) > 0.98 * self.cache_pending.maxsize: | ||
| self.logger.warning('IP cache pending challenge is 98% full. ') | ||
| result = [] | ||
| for r in records: | ||
| if r['ip'] not in self.cache_passed and r['ip'] not in self.cache_pending: | ||
| result.append(r) | ||
|
|
||
| for r in result: | ||
| self.cache_pending[r['ip']] = { | ||
| 'fails': 0 | ||
| } | ||
|
|
||
| with open(self.full_path_pending, 'wb') as f: | ||
| pickle.dump(self.cache_pending, f) | ||
mkaranasou marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| self.logger.info(f'IP cache pending: {len(self.cache_pending)}, {len(result)} added') | ||
|
|
||
| return result | ||
|
|
||
| def ip_failed_challenge(self, ip): | ||
| with self.lock: | ||
| if ip not in self.cache_pending.keys(): | ||
| return 0 | ||
|
|
||
| try: | ||
| value = self.cache_pending[ip] | ||
| value['fails'] += 1 | ||
| num_fails = value['fails'] | ||
| self.cache_pending['ip'] = value | ||
| return num_fails | ||
|
|
||
| except KeyError as er: | ||
| self.logger.info(f'IP cache key error {er}') | ||
| pass | ||
|
|
||
| def ip_passed_challenge(self, ip): | ||
| with self.lock: | ||
| if ip in self.cache_passed.keys(): | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to use |
||
| return False | ||
| if ip not in self.cache_pending.keys(): | ||
| return False | ||
| self.cache_passed[ip] = self.cache_pending[ip] | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok I see that we use the cache as a dict, so the above comment makes more sense to me at least 😛 |
||
| del self.cache_pending[ip] | ||
| self.logger.info(f'IP {ip} passed challenge. Total IP in cache_passed: {len(self.cache_passed)}') | ||
|
|
||
| with open(self.full_path_passed_challenge, 'wb') as f: | ||
| pickle.dump(self.cache_passed, f) | ||
| self.logger.info(f'IP cache passed: {len(self.cache_passed)}, 1 added') | ||
| return True | ||
|
|
||
| def ip_banned(self, ip): | ||
| with self.lock: | ||
| try: | ||
| del self.cache_pending[ip] | ||
|
|
||
| except KeyError as er: | ||
| self.logger.info(f'IP cache key error {er}') | ||
| pass | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Have you tested the performance of update? I think we could consider having a separate table for the banjax bans , since they will be a lot less rows than request sets.
Also, do you use sql strings because of better performance?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.