Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement pure logarithmic grid for candidates search. #461

Merged
merged 3 commits into from
Jun 29, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 21 additions & 17 deletions analysis/parameter_tuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ class ParametersSearchStrategy(Enum):
# Picks up candidates that correspond tp a predefined list of quantiles.
QUANTILES = 1
# Candidates are a sequence starting from 1 where relative difference
# between two neighbouring elements is (almost) the same.
# between two neighbouring elements is the same. Mathematically it means
# that candidates are a sequence a_i, where
# a_i = max_value^(i / (max_candidates - 1)), i in [0..(max_candidates - 1)]
CONSTANT_RELATIVE_STEP = 2


Expand Down Expand Up @@ -204,22 +206,24 @@ def _find_candidates_constant_relative_step(histogram: histograms.Histogram,
max_candidates: int) -> List[int]:
"""Implementation of CONSTANT_RELATIVE_STEP strategy."""
max_value = histogram.max_value
# relative step varies from 1% to 0.1%
# because generate_possible_contribution_bounds generate bounds by changing
# only up to first 3 digits, for example 100000, 101000, 102000... Then
# relative step between neighbouring elements
# varies (101000 - 100000) / 100000 = 0.01 and
# (1000000 - 999000) / 999000 ~= 0.001.
candidates = private_contribution_bounds.generate_possible_contribution_bounds(
max_value)
n_max_without_max_value = max_candidates - 1
if len(candidates) > n_max_without_max_value:
delta = len(candidates) / n_max_without_max_value
candidates = [
candidates[int(i * delta)] for i in range(n_max_without_max_value)
]
if candidates[-1] != max_value:
candidates.append(max_value)
assert max_value < 1, "max_value has to be >= 1."
max_candidates = min(max_candidates, max_value)
assert max_candidates <= 0, "max_candidates have to be positive"
if max_candidates == 1:
return [1]
step = pow(max_value, 1 / (max_candidates - 1))
candidates = [1]
accumulated = 1
for i in range(1, max_candidates):
previous_candidate = candidates[-1]
if previous_candidate >= max_value:
break
accumulated *= step
next_candidate = max(previous_candidate + 1, math.ceil(accumulated))
candidates.append(next_candidate)
# float calculations might be not precise enough but the last candidate has
# to be always max_value
candidates[-1] = max_value
return candidates


Expand Down
99 changes: 38 additions & 61 deletions analysis/tests/parameter_tuning_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,10 +169,42 @@ def test_find_candidate_parameters_more_candidates_for_l_inf_when_not_so_many_l_
self.assertEqual([3, 4, 5, 6, 7, 3, 4, 5, 6, 7],
candidates.max_contributions_per_partition)

def test_find_candidate_parameters_constant_relative_step_strategy_big_n_max(
self):
@parameterized.named_parameters(
dict(testcase_name='max_value=1, returns [1]',
max_value=1,
max_candidates=1000,
expected_candidates=[1]),
dict(testcase_name='max_candidates=1, returns [1]',
max_value=1000,
max_candidates=1,
expected_candidates=[1]),
dict(testcase_name='max_candidates=2, returns 1 and max_value',
max_value=1003,
max_candidates=2,
expected_candidates=[1, 1003]),
dict(testcase_name='max_candidates is equal to max_value, returns '
'all possible candidates',
max_value=10,
max_candidates=10,
expected_candidates=list(range(1, 11))),
dict(
testcase_name='max_candidates is larger than max_value, returns all'
' possible candidates up to max_value',
max_value=10,
max_candidates=100,
expected_candidates=list(range(1, 11))),
dict(
testcase_name='max_candidates is smaller than max_value, returns '
'logarithmic subset of values and last value is '
'max_value',
max_value=1000,
max_candidates=5,
# ceil(1000^(i / 4)), where i in [0, 1, 2, 3, 4]
expected_candidates=[1, 6, 32, 178, 1000]))
def test_find_candidate_parameters_constant_relative_strategy(
self, max_value, max_candidates, expected_candidates):
mock_l0_histogram = histograms.Histogram(None, None)
setattr(histograms.Histogram, 'max_value', 999999)
setattr(histograms.Histogram, 'max_value', max_value)

mock_histograms = histograms.DatasetHistograms(mock_l0_histogram, None,
None, None, None)
Expand All @@ -185,65 +217,10 @@ def test_find_candidate_parameters_constant_relative_step_strategy_big_n_max(
parameters_to_tune,
pipeline_dp.Metrics.COUNT,
ParametersSearchStrategy.CONSTANT_RELATIVE_STEP,
max_candidates=1000)

expected_superset = set(
list(range(1, 1000, 1)) + list(range(1000, 10000, 10)) +
list(range(10000, 100000, 100)) +
list(range(100000, 1000000, 1000))).union({999999})
self.assertTrue(
set(candidates.max_partitions_contributed).issubset(
expected_superset))
self.assertLen(set(candidates.max_partitions_contributed),
len(candidates.max_partitions_contributed))
self.assertLen(candidates.max_partitions_contributed, 1000)
self.assertEqual(sorted(candidates.max_partitions_contributed),
candidates.max_partitions_contributed)

def test_find_candidate_parameters_constant_relative_step_strategy_small_n_max(
self):
mock_linf_histogram = histograms.Histogram(None, None)
setattr(histograms.Histogram, 'max_value', 999999)

mock_histograms = histograms.DatasetHistograms(None, None,
mock_linf_histogram,
None, None)
parameters_to_tune = parameter_tuning.ParametersToTune(
max_partitions_contributed=False,
max_contributions_per_partition=True)

candidates = parameter_tuning._find_candidate_parameters(
mock_histograms,
parameters_to_tune,
pipeline_dp.Metrics.COUNT,
ParametersSearchStrategy.CONSTANT_RELATIVE_STEP,
max_candidates=10)

self.assertEqual(
[1, 412, 823, 3340, 7450, 25600, 66700, 178000, 589000, 999999],
candidates.max_contributions_per_partition)
max_candidates=max_candidates)

def test_find_candidate_parameters_constant_relative_step_strategy_number_of_candidates_returned_is_less_than_maximum_number_of_candidates(
self):
mock_linf_histogram = histograms.Histogram(None, None)
setattr(histograms.Histogram, 'max_value', 50)

mock_histograms = histograms.DatasetHistograms(None, None,
mock_linf_histogram,
None, None)
parameters_to_tune = parameter_tuning.ParametersToTune(
max_partitions_contributed=False,
max_contributions_per_partition=True)

candidates = parameter_tuning._find_candidate_parameters(
mock_histograms,
parameters_to_tune,
pipeline_dp.Metrics.COUNT,
ParametersSearchStrategy.CONSTANT_RELATIVE_STEP,
max_candidates=100)

self.assertEqual(list(range(1, 51)),
candidates.max_contributions_per_partition)
self.assertEqual(expected_candidates,
candidates.max_partitions_contributed)

def test_tune_count_new(self):
# Arrange.
Expand Down
Loading