Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split diff by block type #22830

Merged
merged 5 commits into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions src/olympia/blocklist/cron.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from olympia.zadmin.models import get_config

from .mlbf import MLBF
from .models import Block, BlocklistSubmission
from .models import Block, BlocklistSubmission, BlockType
from .tasks import cleanup_old_files, process_blocklistsubmission, upload_filter
from .utils import datetime_to_ts

Expand Down Expand Up @@ -89,7 +89,9 @@ def _upload_mlbf_to_remote_settings(*, force_base=False):
else base_filter
)

changes_count = mlbf.blocks_changed_since_previous(previous_filter)
changes_count = mlbf.blocks_changed_since_previous(
BlockType.BLOCKED, previous_filter
)
statsd.incr(
'blocklist.cron.upload_mlbf_to_remote_settings.blocked_changed', changes_count
)
Expand Down Expand Up @@ -119,7 +121,8 @@ def _upload_mlbf_to_remote_settings(*, force_base=False):
force_base
or base_filter is None
or previous_filter is None
or mlbf.blocks_changed_since_previous(base_filter) > BASE_REPLACE_THRESHOLD
or mlbf.blocks_changed_since_previous(BlockType.BLOCKED, base_filter)
> BASE_REPLACE_THRESHOLD
)

if make_base_filter:
Expand Down
72 changes: 47 additions & 25 deletions src/olympia/blocklist/mlbf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
import secrets
from enum import Enum
from typing import List, Optional, Set, Tuple
from typing import Dict, List, Optional, Tuple

from django.utils.functional import cached_property

Expand All @@ -19,6 +19,16 @@
log = olympia.core.logger.getLogger('z.amo.blocklist')


def ordered_diff_lists(
previous: List[str], current: List[str]
) -> Tuple[List[str], List[str], int]:
# Use lists instead of sets to maintain order
extras = [x for x in current if x not in previous]
deletes = [x for x in previous if x not in current]
changed_count = len(extras) + len(deletes)
return extras, deletes, changed_count


def generate_mlbf(stats, blocked, not_blocked):
log.info('Starting to generating bloomfilter')

Expand Down Expand Up @@ -123,8 +133,17 @@ def __init__(self, *args, **kwargs):

@cached_property
def _all_blocks(self):
return BlockVersion.objects.filter(version__file__is_signed=True).values_list(
'block__guid', 'version__version', 'version_id', 'block_type', named=True
return (
BlockVersion.objects.filter(version__file__is_signed=True)
.distinct()
.order_by('id')
.values_list(
'block__guid',
'version__version',
'version_id',
'block_type',
named=True,
)
)

def _format_blocks(self, block_type: BlockType) -> List[str]:
Expand All @@ -148,16 +167,19 @@ def soft_blocked_items(self) -> List[str]:
def not_blocked_items(self) -> List[str]:
all_blocks_ids = [version.version_id for version in self._all_blocks]
not_blocked_items = MLBF.hash_filter_inputs(
Version.unfiltered.exclude(id__in=all_blocks_ids or ()).values_list(
'addon__addonguid__guid', 'version'
)
Version.unfiltered.exclude(id__in=all_blocks_ids or ())
.distinct()
.order_by('id')
.values_list('addon__addonguid__guid', 'version')
)
# even though we exclude all the version ids in the query there's an
# edge case where the version string occurs twice for an addon so we
# ensure not_blocked_items contain no blocked_items or soft_blocked_items.
return list(
set(not_blocked_items) - set(self.blocked_items + self.soft_blocked_items)
KevinMind marked this conversation as resolved.
Show resolved Hide resolved
)
return [
item
for item in not_blocked_items
if item not in self.blocked_items + self.soft_blocked_items
KevinMind marked this conversation as resolved.
Show resolved Hide resolved
]


class MLBF:
Expand Down Expand Up @@ -213,33 +235,33 @@ def generate_and_write_filter(self):

def generate_diffs(
self, previous_mlbf: 'MLBF' = None
) -> Tuple[Set[str], Set[str], int]:
previous = set(
[] if previous_mlbf is None else previous_mlbf.data.blocked_items
)
current = set(self.data.blocked_items)
extras = current - previous
deletes = previous - current
changed_count = (
len(extras) + len(deletes) if len(previous) > 0 else len(current)
)
return extras, deletes, changed_count
) -> Dict[BlockType, Tuple[List[str], List[str], int]]:
return {
block_type: ordered_diff_lists(
[] if previous_mlbf is None else previous_mlbf.data[block_type],
self.data[block_type],
)
for block_type in BlockType
}

def generate_and_write_stash(self, previous_mlbf: 'MLBF' = None):
# compare previous with current blocks
extras, deletes, _ = self.generate_diffs(previous_mlbf)
extras, deletes, _ = self.generate_diffs(previous_mlbf)[BlockType.BLOCKED]
stash_json = {
'blocked': list(extras),
'unblocked': list(deletes),
'blocked': extras,
'unblocked': deletes,
}
# write stash
stash_path = self.stash_path
with self.storage.open(stash_path, 'w') as json_file:
log.info(f'Writing to file {stash_path}')
json.dump(stash_json, json_file)

def blocks_changed_since_previous(self, previous_mlbf: 'MLBF' = None):
return self.generate_diffs(previous_mlbf)[2]
def blocks_changed_since_previous(
self, block_type: BlockType = BlockType.BLOCKED, previous_mlbf: 'MLBF' = None
):
_, _, changed_count = self.generate_diffs(previous_mlbf)[block_type]
return changed_count

@classmethod
def load_from_storage(
Expand Down
Loading
Loading