Skip to content

Commit

Permalink
Implement pure logarithmic grid for candidates search. (#461)
Browse files Browse the repository at this point in the history
  • Loading branch information
RamSaw authored Jun 29, 2023
1 parent 69372bd commit dada1fe
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 78 deletions.
38 changes: 21 additions & 17 deletions analysis/parameter_tuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ class ParametersSearchStrategy(Enum):
# Picks up candidates that correspond tp a predefined list of quantiles.
QUANTILES = 1
# Candidates are a sequence starting from 1 where relative difference
# between two neighbouring elements is (almost) the same.
# between two neighbouring elements is the same. Mathematically it means
# that candidates are a sequence a_i, where
# a_i = max_value^(i / (max_candidates - 1)), i in [0..(max_candidates - 1)]
CONSTANT_RELATIVE_STEP = 2


Expand Down Expand Up @@ -204,22 +206,24 @@ def _find_candidates_constant_relative_step(histogram: histograms.Histogram,
max_candidates: int) -> List[int]:
"""Implementation of CONSTANT_RELATIVE_STEP strategy."""
max_value = histogram.max_value
# relative step varies from 1% to 0.1%
# because generate_possible_contribution_bounds generate bounds by changing
# only up to first 3 digits, for example 100000, 101000, 102000... Then
# relative step between neighbouring elements
# varies (101000 - 100000) / 100000 = 0.01 and
# (1000000 - 999000) / 999000 ~= 0.001.
candidates = private_contribution_bounds.generate_possible_contribution_bounds(
max_value)
n_max_without_max_value = max_candidates - 1
if len(candidates) > n_max_without_max_value:
delta = len(candidates) / n_max_without_max_value
candidates = [
candidates[int(i * delta)] for i in range(n_max_without_max_value)
]
if candidates[-1] != max_value:
candidates.append(max_value)
assert max_value >= 1, "max_value has to be >= 1."
max_candidates = min(max_candidates, max_value)
assert max_candidates > 0, "max_candidates have to be positive"
if max_candidates == 1:
return [1]
step = pow(max_value, 1 / (max_candidates - 1))
candidates = [1]
accumulated = 1
for i in range(1, max_candidates):
previous_candidate = candidates[-1]
if previous_candidate >= max_value:
break
accumulated *= step
next_candidate = max(previous_candidate + 1, math.ceil(accumulated))
candidates.append(next_candidate)
# float calculations might be not precise enough but the last candidate has
# to be always max_value
candidates[-1] = max_value
return candidates


Expand Down
99 changes: 38 additions & 61 deletions analysis/tests/parameter_tuning_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,10 +169,42 @@ def test_find_candidate_parameters_more_candidates_for_l_inf_when_not_so_many_l_
self.assertEqual([3, 4, 5, 6, 7, 3, 4, 5, 6, 7],
candidates.max_contributions_per_partition)

def test_find_candidate_parameters_constant_relative_step_strategy_big_n_max(
self):
@parameterized.named_parameters(
dict(testcase_name='max_value=1, returns [1]',
max_value=1,
max_candidates=1000,
expected_candidates=[1]),
dict(testcase_name='max_candidates=1, returns [1]',
max_value=1000,
max_candidates=1,
expected_candidates=[1]),
dict(testcase_name='max_candidates=2, returns 1 and max_value',
max_value=1003,
max_candidates=2,
expected_candidates=[1, 1003]),
dict(testcase_name='max_candidates is equal to max_value, returns '
'all possible candidates',
max_value=10,
max_candidates=10,
expected_candidates=list(range(1, 11))),
dict(
testcase_name='max_candidates is larger than max_value, returns all'
' possible candidates up to max_value',
max_value=10,
max_candidates=100,
expected_candidates=list(range(1, 11))),
dict(
testcase_name='max_candidates is smaller than max_value, returns '
'logarithmic subset of values and last value is '
'max_value',
max_value=1000,
max_candidates=5,
# ceil(1000^(i / 4)), where i in [0, 1, 2, 3, 4]
expected_candidates=[1, 6, 32, 178, 1000]))
def test_find_candidate_parameters_constant_relative_ste_strategy(
self, max_value, max_candidates, expected_candidates):
mock_l0_histogram = histograms.Histogram(None, None)
setattr(histograms.Histogram, 'max_value', 999999)
setattr(histograms.Histogram, 'max_value', max_value)

mock_histograms = histograms.DatasetHistograms(mock_l0_histogram, None,
None, None, None)
Expand All @@ -185,65 +217,10 @@ def test_find_candidate_parameters_constant_relative_step_strategy_big_n_max(
parameters_to_tune,
pipeline_dp.Metrics.COUNT,
ParametersSearchStrategy.CONSTANT_RELATIVE_STEP,
max_candidates=1000)

expected_superset = set(
list(range(1, 1000, 1)) + list(range(1000, 10000, 10)) +
list(range(10000, 100000, 100)) +
list(range(100000, 1000000, 1000))).union({999999})
self.assertTrue(
set(candidates.max_partitions_contributed).issubset(
expected_superset))
self.assertLen(set(candidates.max_partitions_contributed),
len(candidates.max_partitions_contributed))
self.assertLen(candidates.max_partitions_contributed, 1000)
self.assertEqual(sorted(candidates.max_partitions_contributed),
candidates.max_partitions_contributed)

def test_find_candidate_parameters_constant_relative_step_strategy_small_n_max(
self):
mock_linf_histogram = histograms.Histogram(None, None)
setattr(histograms.Histogram, 'max_value', 999999)

mock_histograms = histograms.DatasetHistograms(None, None,
mock_linf_histogram,
None, None)
parameters_to_tune = parameter_tuning.ParametersToTune(
max_partitions_contributed=False,
max_contributions_per_partition=True)

candidates = parameter_tuning._find_candidate_parameters(
mock_histograms,
parameters_to_tune,
pipeline_dp.Metrics.COUNT,
ParametersSearchStrategy.CONSTANT_RELATIVE_STEP,
max_candidates=10)

self.assertEqual(
[1, 412, 823, 3340, 7450, 25600, 66700, 178000, 589000, 999999],
candidates.max_contributions_per_partition)
max_candidates=max_candidates)

def test_find_candidate_parameters_constant_relative_step_strategy_number_of_candidates_returned_is_less_than_maximum_number_of_candidates(
self):
mock_linf_histogram = histograms.Histogram(None, None)
setattr(histograms.Histogram, 'max_value', 50)

mock_histograms = histograms.DatasetHistograms(None, None,
mock_linf_histogram,
None, None)
parameters_to_tune = parameter_tuning.ParametersToTune(
max_partitions_contributed=False,
max_contributions_per_partition=True)

candidates = parameter_tuning._find_candidate_parameters(
mock_histograms,
parameters_to_tune,
pipeline_dp.Metrics.COUNT,
ParametersSearchStrategy.CONSTANT_RELATIVE_STEP,
max_candidates=100)

self.assertEqual(list(range(1, 51)),
candidates.max_contributions_per_partition)
self.assertEqual(expected_candidates,
candidates.max_partitions_contributed)

def test_tune_count(self):
# Arrange.
Expand Down

0 comments on commit dada1fe

Please sign in to comment.