Skip to content

upgrade filtercascade to 0.3.0 with salt #13863

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions requirements/prod.txt
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,9 @@ defusedxml==0.6.0 \
dennis==0.9 \
--hash=sha256:f6487392ac91800c5f0684a99b404b7fd0f72ceb48faeb5a0ce4e2c24fb62d3f \
--hash=sha256:8c942dd5da7d03c65daebc069c5ee5c7f1374ac9b0c8c89c627caa66fe822604
Deprecated==1.2.7 \
--hash=sha256:8b6a5aa50e482d8244a62e5582b96c372e87e3a28e8b49c316e46b95c76a611d \
--hash=sha256:408038ab5fdeca67554e8f6742d1521cd3cd0ee0ff9d47f29318a4f4da31c308
django-aesfield==2.2 \
--hash=sha256:3b5f8816ed2e57f233bbcc10ea6f815aa5c07eeadfc7a27895729a3c99792d7a \
--hash=sha256:b3b09351df1bc272506e47a37889dbfe539d7075ba27470379a4964a1a8325f2
Expand Down Expand Up @@ -214,8 +217,8 @@ feedparser==5.2.1 \
--hash=sha256:ce875495c90ebd74b179855449040003a1beb40cd13d5f037a0654251e260b02 \
--hash=sha256:bd030652c2d08532c034c27fcd7c85868e7fa3cb2b17f230a44a6bbc92519bf9 \
--hash=sha256:cd2485472e41471632ed3029d44033ee420ad0b57111db95c240c9160a85831c
filtercascade==0.2.2 \
--hash=sha256:f8b472eddbe6f9860aff8816815848fb5f520c6e06ca75dafe811eb9824a9ab1
filtercascade==0.3.0 \
--hash=sha256:a16952af79da10b26df4c731ccc3339d5a3edb4b3c5519f96772ecfd80bc3144
# funcsigs is required by mock
funcsigs==1.0.2 \
--hash=sha256:330cc27ccbf7f1e992e69fef78261dc7c6569012cf397db8d3de0234e6c937ca \
Expand Down Expand Up @@ -436,3 +439,5 @@ protobuf==3.11.3 \
--hash=sha256:fdfb6ad138dbbf92b5dbea3576d7c8ba7463173f7d2cb0ca1bd336ec88ddbd80
googleapis-common-protos==1.51.0 \
--hash=sha256:013c91704279119150e44ef770086fdbba158c1f978a6402167d47d5409e226e
wrapt==1.12.1 \
--hash=sha256:b62ffa81fb85f4332a4f609cab4ac40709470da05643a082ec1eb88e6d9b97d7
8 changes: 4 additions & 4 deletions src/olympia/blocklist/cron.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from olympia.lib.kinto import KintoServer
from olympia.zadmin.models import get_config, set_config

from .mlbf import generate_mlbf, get_mlbf_key_format
from .mlbf import generate_mlbf, MLBF_KEY_FORMAT
from .models import Block
from .utils import KINTO_BUCKET, KINTO_COLLECTION_MLBF

Expand Down Expand Up @@ -36,7 +36,7 @@ def upload_mlbf_to_kinto():
server = KintoServer(
KINTO_BUCKET, KINTO_COLLECTION_MLBF, kinto_sign_off_needed=False)
stats = {}
key_format = get_mlbf_key_format()

# This timestamp represents the point in time when all previous addon
# guid + versions and blocks were used to generate the bloomfilter.
# An add-on version/file from before this time will definitely be accounted
Expand All @@ -45,12 +45,12 @@ def upload_mlbf_to_kinto():
# there may be false positives or false negatives.
# https://github.com/mozilla/addons-server/issues/13695
generation_time = int(time.time() * 1000)
bloomfilter = generate_mlbf(stats, key_format)
bloomfilter = generate_mlbf(stats)
with tempfile.NamedTemporaryFile() as filter_file:
bloomfilter.tofile(filter_file)
filter_file.seek(0)
data = {
'key_format': key_format,
'key_format': MLBF_KEY_FORMAT,
'generation_time': generation_time,
}
attachment = ('filter.bin', filter_file, 'application/octet-stream')
Expand Down
17 changes: 2 additions & 15 deletions src/olympia/blocklist/management/commands/export_blocklist.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import olympia.core.logger

from olympia.blocklist.mlbf import generate_mlbf, get_mlbf_key_format
from olympia.blocklist.mlbf import generate_mlbf


log = olympia.core.logger.getLogger('z.amo.blocklist')
Expand All @@ -18,12 +18,6 @@ class Command(BaseCommand):

def add_arguments(self, parser):
"""Handle command arguments."""
parser.add_argument(
'--salt',
type=int,
default=None,
dest='salt',
help='Bloom filter salt')
parser.add_argument(
'id',
help="CT baseline identifier",
Expand All @@ -48,19 +42,13 @@ def load_json(self, json_path):

def save_blocklist(self, stats, mlbf, id_):
out_file = os.path.join(settings.TMP_PATH, 'mlbf', id_, 'filter')
meta_file = os.path.join(settings.TMP_PATH, 'mlbf', id_, 'filter.meta')

os.makedirs(os.path.dirname(out_file), exist_ok=True)
with default_storage.open(out_file, 'wb') as mlbf_file:
log.info("Writing to file {}".format(out_file))
mlbf.tofile(mlbf_file)
stats['mlbf_filesize'] = os.stat(out_file).st_size

with default_storage.open(meta_file, 'wb') as mlbf_meta_file:
log.info("Writing to meta file {}".format(meta_file))
mlbf.saveDiffMeta(mlbf_meta_file)
stats['mlbf_metafilesize'] = os.stat(meta_file).st_size

def handle(self, *args, **options):
log.debug('Exporting blocklist to file')
stats = {}
Expand All @@ -72,8 +60,7 @@ def handle(self, *args, **options):
generate_kw['not_blocked'] = (
self.load_json(options.get('addon_guids_input')))

salt = options.get('salt')
mlbf = generate_mlbf(stats, get_mlbf_key_format(salt), **generate_kw)
mlbf = generate_mlbf(stats, **generate_kw)
self.save_blocklist(
stats,
mlbf,
Expand Down
37 changes: 19 additions & 18 deletions src/olympia/blocklist/mlbf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,15 @@
from collections import defaultdict

from filtercascade import FilterCascade
from filtercascade.fileformats import HashAlgorithm

import olympia.core.logger


log = olympia.core.logger.getLogger('z.amo.blocklist')

MLBF_KEY_FORMAT = '{guid}:{version}'


def get_blocked_guids():
from olympia.files.models import File
Expand Down Expand Up @@ -50,38 +54,35 @@ def get_all_guids():
return Version.unfiltered.values_list('addon__guid', 'version')


def hash_filter_inputs(input_list, key_format):
def hash_filter_inputs(input_list):
return [
key_format.format(guid=guid, version=version)
MLBF_KEY_FORMAT.format(guid=guid, version=version)
for (guid, version) in input_list]


def get_mlbf_key_format(salt=None):
salt = salt or secrets.token_hex(16)
return '%s:{guid}:{version}' % salt


def generate_mlbf(stats, key_format, *, blocked=None, not_blocked=None):
"""Based on:
def generate_mlbf(stats, *, blocked=None, not_blocked=None):
"""Originally based on:
https://github.com/mozilla/crlite/blob/master/create_filter_cascade/certs_to_crlite.py
(not so much any longer, apart from the fprs calculation)
"""
blocked = hash_filter_inputs(
blocked or get_blocked_guids(), key_format)
not_blocked = hash_filter_inputs(
not_blocked or get_all_guids(), key_format)

blocked = hash_filter_inputs(blocked or get_blocked_guids())
not_blocked = hash_filter_inputs(not_blocked or get_all_guids())
not_blocked = list(set(not_blocked) - set(blocked))

salt = secrets.token_bytes(16)

stats['mlbf_blocked_count'] = len(blocked)
stats['mlbf_unblocked_count'] = len(not_blocked)

fprs = [len(blocked) / (math.sqrt(2) * len(not_blocked)), 0.5]

log.info("Generating filter")
cascade = FilterCascade.cascade_with_characteristics(
int(len(blocked) * 1.1), fprs)

cascade.version = 1
capacity=int(len(blocked) * 1.1),
error_rates=fprs,
defaultHashAlg=HashAlgorithm.SHA256,
salt=salt,
)
cascade.initialize(include=blocked, exclude=not_blocked)

stats['mlbf_fprs'] = fprs
Expand All @@ -92,5 +93,5 @@ def generate_mlbf(stats, key_format, *, blocked=None, not_blocked=None):
log.debug("Filter cascade layers: {layers}, bit: {bits}".format(
layers=cascade.layerCount(), bits=cascade.bitCount()))

cascade.check(entries=blocked, exclusions=not_blocked)
cascade.verify(include=blocked, exclude=not_blocked)
return cascade
1 change: 0 additions & 1 deletion src/olympia/blocklist/tests/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,4 +278,3 @@ def test_command(self):
call_command('export_blocklist', '1')
out_path = os.path.join(settings.TMP_PATH, 'mlbf', '1')
assert os.path.exists(os.path.join(out_path, 'filter'))
assert os.path.exists(os.path.join(out_path, 'filter.meta'))
10 changes: 3 additions & 7 deletions src/olympia/blocklist/tests/test_cron.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from olympia.amo.tests import addon_factory, TestCase, user_factory
from olympia.blocklist.cron import MLBF_TIME_CONFIG_KEY, upload_mlbf_to_kinto
from olympia.blocklist.mlbf import get_mlbf_key_format
from olympia.blocklist.mlbf import MLBF_KEY_FORMAT
from olympia.blocklist.models import Block
from olympia.lib.kinto import KintoServer
from olympia.zadmin.models import get_config, set_config
Expand All @@ -22,16 +22,12 @@ def setUp(self):

@freeze_time('2020-01-01 12:34:56')
@override_switch('blocklist_mlbf_submit', active=True)
@mock.patch('olympia.blocklist.cron.get_mlbf_key_format')
@mock.patch.object(KintoServer, 'publish_attachment')
def test_upload_mlbf_to_kinto(self, publish_mock, get_mlbf_key_mock):
key_format = get_mlbf_key_format()
get_mlbf_key_mock.return_value = key_format

def test_upload_mlbf_to_kinto(self, publish_mock):
upload_mlbf_to_kinto()

publish_mock.assert_called_with(
{'key_format': key_format,
{'key_format': MLBF_KEY_FORMAT,
'generation_time':
datetime.datetime(2020, 1, 1, 12, 34, 56).timestamp() * 1000},
('filter.bin', mock.ANY, 'application/octet-stream'))
Expand Down
20 changes: 10 additions & 10 deletions src/olympia/blocklist/tests/test_mlbf.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
addon_factory, TestCase, user_factory, version_factory)
from olympia.blocklist.models import Block
from olympia.blocklist.mlbf import (
generate_mlbf, get_all_guids, get_blocked_guids, get_mlbf_key_format,
hash_filter_inputs)
generate_mlbf, get_all_guids, get_blocked_guids, hash_filter_inputs,
MLBF_KEY_FORMAT)
from olympia.files.models import File


Expand Down Expand Up @@ -103,14 +103,14 @@ def test_hash_filter_inputs(self):
('guid@', '1.0'),
('foo@baa', '999.223a'),
]
assert hash_filter_inputs(data, get_mlbf_key_format(37872)) == [
'37872:guid@:1.0',
'37872:foo@baa:999.223a',
assert hash_filter_inputs(data) == [
'guid@:1.0',
'foo@baa:999.223a',
]

def test_generate_mlbf(self):
stats = {}
key_format = '{guid}:{version}'
key_format = MLBF_KEY_FORMAT
blocked = [
('guid1@', '1.0'), ('@guid2', '1.0'), ('@guid2', '1.1'),
('guid3@', '0.01b1')]
Expand All @@ -119,16 +119,16 @@ def test_generate_mlbf(self):
('guid30@', '0.01b1'), ('guid100@', '1.0'), ('@guid200', '1.0'),
('@guid200', '1.1'), ('guid300@', '0.01b1')]
bfilter = generate_mlbf(
stats, key_format, blocked=blocked, not_blocked=not_blocked)
stats, blocked=blocked, not_blocked=not_blocked)
for entry in blocked:
key = key_format.format(guid=entry[0], version=entry[1])
assert key in bfilter
for entry in not_blocked:
key = key_format.format(guid=entry[0], version=entry[1])
assert key not in bfilter
assert stats['mlbf_version'] == 1
assert stats['mlbf_version'] == 2
assert stats['mlbf_layers'] == 2
assert stats['mlbf_bits'] == 14409
assert stats['mlbf_bits'] == 14416
with tempfile.NamedTemporaryFile() as out:
bfilter.tofile(out)
assert os.stat(out.name).st_size == 1824
assert os.stat(out.name).st_size == 1842